linux中的代码每一块深究了看,都有好多东西,在看printk代码时,你会发现syslog进行读操作时,如果ring buffer没有新的内容的话,进程会等待
而实现这个操作是通过linux中的等待队列实现的,等待队列据说还和信号量有关系,这个后面再看,今天研究下等待队列wait queue

case SYSLOG_ACTION_READ:        /* Read from log */
        if (!buf || len < 0)
                return -EINVAL;
        if (!len)
                return 0;
        if (!access_ok(buf, len))
                return -EFAULT;
        error = wait_event_interruptible(log_wait, syslog_seq != log_next_seq); //这里会等待
        if (error)
                return error;
        error = syslog_print(buf, len);
        break;

数据结构

等列队列相关数据结构有两个,一个是等列队列头wait_queue_head,另一个是等列队列元素wait_queue_entry

/*
 * A single wait-queue entry structure:
 */
struct wait_queue_entry {
        unsigned int            flags;
        void                    *private;  //指向等待队列的进程
        wait_queue_func_t       func;      //唤醒函数
        struct list_head        entry;     //要挂到wait_queue_head链表里的节点
};

struct wait_queue_head {
        spinlock_t              lock;
        struct list_head        head;      //双向链表
};
typedef struct wait_queue_head wait_queue_head_t;

在使用时,一般会用DECLARE_WAIT_QUEUE_HEAD宏来初始化一个等待队列头

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {                                   \
        .lock           = __SPIN_LOCK_UNLOCKED(name.lock),                      \
        .head           = { &(name).head, &(name).head } }    // list_head的next 和 prev是同一个结点,即只有一个结点

#define DECLARE_WAIT_QUEUE_HEAD(name) \
        struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)    //通过__WAIT_QUEUE_HEAD_INITIALIZER赋初值

用DECLARE_WAITQUEUE来初始化队列元素,不过这个一般用不到显示调用,后面会讲

#define __WAITQUEUE_INITIALIZER(name, tsk) {                                    \
        .private        = tsk,                                                  \
        .func           = default_wake_function,   //默认唤醒函数                             \
        .entry          = { NULL, NULL } }         //

#define DECLARE_WAITQUEUE(name, tsk)                                            \
        struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)

睡眠与唤醒

让我们再次看一下printk.c中do_syslog中的SYSLOG_ACTION_READ代码,发现其睡眠等待是调用wait_event_interruptible函数即可
其唤醒是在wake_up_klogd_work_func函数里调用的wake_up_interruptible。okay,看一下这两个函数的实现

睡眠

#define wait_event_interruptible(wq_head, condition)                            \
({                                                                              \
        int __ret = 0;                                                          \
        might_sleep();                                                          \
        if (!(condition))                                                       \
                __ret = __wait_event_interruptible(wq_head, condition);         \
        __ret;                                                                  \
})

很容易理解,如果condition不成立的话,则调用__wait_event_interruptible,接着进去看的话发现重头戏在__wait_event函数式宏
在__wait_event里,会新定义一个等待队列元素wait_queue_entry,通过init_wait_entry初始化,然后在prepare_to_wait_event里将其加入等待队列头wq_head的链表里
并且判断condition是否成立以及进程是否有信号待处理,如果有的话则跳出,否则就调用schedule直至下次被调度到

#define __wait_event_interruptible(wq_head, condition)                          \
        ___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,             \
                      schedule())

#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)           \
({                                                                              \
        __label__ __out;                                                        \
        struct wait_queue_entry __wq_entry;                                     \
        long __ret = ret;       /* explicit shadow */                           \
                                                                                \
        init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);        \
        for (;;) {                                                              \
                long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
                                                                                \
                if (condition)   //如果条件成立,break跳出                        \
                        break;                                                  \
                //如果是可以interrupt的状态,且有信号待处理,则out,对于于调用wait_event_interruptible加入等待队列的来说,是可以interrupt的状态                                                              \
                if (___wait_is_interruptible(state) && __int) {                 \
                        __ret = __int;                                          \
                        goto __out;                                             \
                }                                                               \
                                                                                \
                cmd; //调用schedule(),调度其他进程防止这里空转浪费资源              \
        }                                                                       \
        finish_wait(&wq_head, &__wq_entry);//将元素从队列删除                     \
__out:  __ret;                                                                  \
})

再看一下init_wait_entry函数对wait_queue_entry初始化

void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
        wq_entry->flags = flags;
        wq_entry->private = current;                 //当前进程,即拥有此元素的进程
        wq_entry->func = autoremove_wake_function;   //默认唤醒函数
        INIT_LIST_HEAD(&wq_entry->entry);
}
EXPORT_SYMBOL(init_wait_entry);

唤醒

与wait_event_interruptible对应的唤醒函数是wake_up_interruptible,其调用流程里最重要的是__wake_up_common函数

#define wake_up_interruptible(x)        __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) //需要注意这里的1,下面会讲


/**
 * __wake_up - wake up threads blocked on a waitqueue.
 * @wq_head: the waitqueue
 * @mode: which threads
 * @nr_exclusive: how many wake-one or wake-many threads to wake up     //从__wake_up传来的值是1
 * @key: is directly passed to the wakeup function
 *
 * If this function wakes up a task, it executes a full memory barrier before
 * accessing the task state.
 */
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
                        int nr_exclusive, void *key)
{
        __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}

__wake_up_common函数中比较难以理解的是bookmark的作用,以及从__wake_up中传过来的值nr_exclusive的作用
bookmark这一entry的作用是为了防止唤醒这一操作持有锁过久,通过与WAITQUEUE_WALK_BREAK_CNT配合,在唤醒一定个数后,释放一段时间

nr_exclusive是作用是因为有些进程拥有WQ_FLAG_EXCLUSIVE标志,该标志意味着进程想要被独占地唤醒,而nr_exclusive就是控制独占唤醒进程的个数


/*
 * Scan threshold to break wait queue walk.
 * This allows a waker to take a break from holding the
 * wait queue lock during the wait queue walk.
 */
#define WAITQUEUE_WALK_BREAK_CNT 64

static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
                        int nr_exclusive, int wake_flags, void *key,
                        wait_queue_entry_t *bookmark)
{
        wait_queue_entry_t *curr, *next;
        int cnt = 0;

        lockdep_assert_held(&wq_head->lock);

        if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {  //如果bookmark被设置了WQ_FLAG_BOOKMARK
                curr = list_next_entry(bookmark, entry);    // 取bookmark之后的一个entry

                list_del(&bookmark->entry);                 // 从链表删除bookmark
                bookmark->flags = 0;
        } else
                curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); // 取链表第一个节点entry

        if (&curr->entry == &wq_head->head)
                return nr_exclusive;

        list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
                unsigned flags = curr->flags;
                int ret;

                if (flags & WQ_FLAG_BOOKMARK)   //跳过带有WQ_FLAG_BOOKMARK flag的entry
                        continue;

                ret = curr->func(curr, mode, wake_flags, key);   //唤醒
                if (ret < 0)
                        break;
                if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)   // 如果是独占进程则,nr_exclusive减1并跳出
                        break;

                if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&  //如果唤醒的entry的cnt大于WAITQUEUE_WALK_BREAK_CNT的值,那么为bookmark这一entry加上WQ_FLAG_BOOKMARK
                                (&next->entry != &wq_head->head)) {
                        bookmark->flags = WQ_FLAG_BOOKMARK;
                        list_add_tail(&bookmark->entry, &next->entry); //并将其加入链表最后
                        break;
                }
        }

        return nr_exclusive;
}