linux中的等待队列
linux中的代码每一块深究了看,都有好多东西,在看printk代码时,你会发现syslog进行读操作时,如果ring buffer没有新的内容的话,进程会等待
而实现这个操作是通过linux中的等待队列实现的,等待队列据说还和信号量有关系,这个后面再看,今天研究下等待队列wait queue
case SYSLOG_ACTION_READ: /* Read from log */
if (!buf || len < 0)
return -EINVAL;
if (!len)
return 0;
if (!access_ok(buf, len))
return -EFAULT;
error = wait_event_interruptible(log_wait, syslog_seq != log_next_seq); //这里会等待
if (error)
return error;
error = syslog_print(buf, len);
break;
数据结构
等列队列相关数据结构有两个,一个是等列队列头wait_queue_head,另一个是等列队列元素wait_queue_entry
/*
* A single wait-queue entry structure:
*/
struct wait_queue_entry {
unsigned int flags;
void *private; //指向等待队列的进程
wait_queue_func_t func; //唤醒函数
struct list_head entry; //要挂到wait_queue_head链表里的节点
};
struct wait_queue_head {
spinlock_t lock;
struct list_head head; //双向链表
};
typedef struct wait_queue_head wait_queue_head_t;
在使用时,一般会用DECLARE_WAIT_QUEUE_HEAD宏来初始化一个等待队列头
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.head = { &(name).head, &(name).head } } // list_head的next 和 prev是同一个结点,即只有一个结点
#define DECLARE_WAIT_QUEUE_HEAD(name) \
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) //通过__WAIT_QUEUE_HEAD_INITIALIZER赋初值
用DECLARE_WAITQUEUE来初始化队列元素,不过这个一般用不到显示调用,后面会讲
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, //默认唤醒函数 \
.entry = { NULL, NULL } } //
#define DECLARE_WAITQUEUE(name, tsk) \
struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)
睡眠与唤醒
让我们再次看一下printk.c中do_syslog中的SYSLOG_ACTION_READ代码,发现其睡眠等待是调用wait_event_interruptible函数即可
其唤醒是在wake_up_klogd_work_func函数里调用的wake_up_interruptible。okay,看一下这两个函数的实现
睡眠
#define wait_event_interruptible(wq_head, condition) \
({ \
int __ret = 0; \
might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible(wq_head, condition); \
__ret; \
})
很容易理解,如果condition不成立的话,则调用__wait_event_interruptible,接着进去看的话发现重头戏在__wait_event函数式宏
在__wait_event里,会新定义一个等待队列元素wait_queue_entry,通过init_wait_entry初始化,然后在prepare_to_wait_event里将其加入等待队列头wq_head的链表里
并且判断condition是否成立以及进程是否有信号待处理,如果有的话则跳出,否则就调用schedule直至下次被调度到
#define __wait_event_interruptible(wq_head, condition) \
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())
#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd) \
({ \
__label__ __out; \
struct wait_queue_entry __wq_entry; \
long __ret = ret; /* explicit shadow */ \
\
init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \
for (;;) { \
long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
\
if (condition) //如果条件成立,break跳出 \
break; \
//如果是可以interrupt的状态,且有信号待处理,则out,对于于调用wait_event_interruptible加入等待队列的来说,是可以interrupt的状态 \
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
goto __out; \
} \
\
cmd; //调用schedule(),调度其他进程防止这里空转浪费资源 \
} \
finish_wait(&wq_head, &__wq_entry);//将元素从队列删除 \
__out: __ret; \
})
再看一下init_wait_entry函数对wait_queue_entry初始化
void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
{
wq_entry->flags = flags;
wq_entry->private = current; //当前进程,即拥有此元素的进程
wq_entry->func = autoremove_wake_function; //默认唤醒函数
INIT_LIST_HEAD(&wq_entry->entry);
}
EXPORT_SYMBOL(init_wait_entry);
唤醒
与wait_event_interruptible对应的唤醒函数是wake_up_interruptible,其调用流程里最重要的是__wake_up_common函数
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) //需要注意这里的1,下面会讲
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up //从__wake_up传来的值是1
* @key: is directly passed to the wakeup function
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*/
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
__wake_up_common函数中比较难以理解的是bookmark的作用,以及从__wake_up中传过来的值nr_exclusive的作用
bookmark这一entry的作用是为了防止唤醒这一操作持有锁过久,通过与WAITQUEUE_WALK_BREAK_CNT配合,在唤醒一定个数后,释放一段时间
nr_exclusive是作用是因为有些进程拥有WQ_FLAG_EXCLUSIVE标志,该标志意味着进程想要被独占地唤醒,而nr_exclusive就是控制独占唤醒进程的个数
/*
* Scan threshold to break wait queue walk.
* This allows a waker to take a break from holding the
* wait queue lock during the wait queue walk.
*/
#define WAITQUEUE_WALK_BREAK_CNT 64
static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, int wake_flags, void *key,
wait_queue_entry_t *bookmark)
{
wait_queue_entry_t *curr, *next;
int cnt = 0;
lockdep_assert_held(&wq_head->lock);
if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { //如果bookmark被设置了WQ_FLAG_BOOKMARK
curr = list_next_entry(bookmark, entry); // 取bookmark之后的一个entry
list_del(&bookmark->entry); // 从链表删除bookmark
bookmark->flags = 0;
} else
curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); // 取链表第一个节点entry
if (&curr->entry == &wq_head->head)
return nr_exclusive;
list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
int ret;
if (flags & WQ_FLAG_BOOKMARK) //跳过带有WQ_FLAG_BOOKMARK flag的entry
continue;
ret = curr->func(curr, mode, wake_flags, key); //唤醒
if (ret < 0)
break;
if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) // 如果是独占进程则,nr_exclusive减1并跳出
break;
if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && //如果唤醒的entry的cnt大于WAITQUEUE_WALK_BREAK_CNT的值,那么为bookmark这一entry加上WQ_FLAG_BOOKMARK
(&next->entry != &wq_head->head)) {
bookmark->flags = WQ_FLAG_BOOKMARK;
list_add_tail(&bookmark->entry, &next->entry); //并将其加入链表最后
break;
}
}
return nr_exclusive;
}