$ ps -axPID TTY STAT TIME COMMAND1 ? Ss 3:28 /lib/systemd/systemd --system --deserialize 55 splash2 ? S 0:01 [kthreadd]3 ? S 0:00 [pool_workqueue_release]
void start_kernel(void){//rest_init是start_kernel的最后一步/* Do the rest non-__init'ed, we're now alive */rest_init();}
static noinline void __ref __noreturn rest_init(void){struct task_struct *tsk;int pid;rcu_scheduler_starting();/** We need to spawn init first so that it obtains pid 1, however* the init task will end up wanting to create kthreads, which, if* we schedule it before we create kthreadd, will OOPS.*/pid = user_mode_thread(kernel_init, NULL, CLONE_FS);/** Pin init on the boot CPU. Task migration is not properly working* until sched_init_smp() has been run. It will set the allowed* CPUs for init to the non isolated CPUs.*/rcu_read_lock();tsk = find_task_by_pid_ns(pid, &init_pid_ns);tsk->flags |= PF_NO_SETAFFINITY;set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));rcu_read_unlock();numa_default_policy();pid = kernel_thread(kthreadd, NULL, NULL, CLONE_FS | CLONE_FILES);rcu_read_lock();kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);rcu_read_unlock();/** Enable might_sleep() and smp_processor_id() checks.* They cannot be enabled earlier because with CONFIG_PREEMPTION=y* kernel_thread() would trigger might_sleep() splats. With* CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled* already, but it's stuck on the kthreadd_done completion.*/system_state = SYSTEM_SCHEDULING;complete(&kthreadd_done);/** The boot idle thread must execute schedule()* at least once to get things moving:*/schedule_preempt_disabled();/* Call into cpu_idle with preempt disabled */cpu_startup_entry(CPUHP_ONLINE);}
/** Create a user mode thread.*/pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags){struct kernel_clone_args args = {.flags = ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),.exit_signal = (flags & CSIGNAL),.fn = fn,.fn_arg = arg,};return kernel_clone(&args);}
staticint __init init_setup(char *str){unsigned int i;execute_command = str;/** In case LILO is going to boot us with default command line,* it prepends "auto" before the whole cmdline which makes* the shell think it should execute a script with such name.* So we ignore all arguments entered _before_ init=... [MJ]*/for (i = 1; i < MAX_INIT_ARGS; i++)argv_init[i] = NULL;return 1;}__setup("init=", init_setup);staticint __init rdinit_setup(char *str){unsigned int i;ramdisk_execute_command = str;ramdisk_execute_command_set = true;/* See "auto" comment in init_setup */for (i = 1; i < MAX_INIT_ARGS; i++)argv_init[i] = NULL;return 1;}__setup("rdinit=", rdinit_setup);staticint __ref kernel_init(void *unused){int ret;......if (ramdisk_execute_command) {ret = run_init_process(ramdisk_execute_command);if (!ret)return 0;pr_err("Failed to execute %s (error %d)\n",ramdisk_execute_command, ret);}/** We try each of these until one succeeds.** The Bourne shell can be used instead of init if we are* trying to recover a really broken machine.*/if (execute_command) {ret = run_init_process(execute_command);if (!ret)return 0;panic("Requested init %s failed (error %d).",execute_command, ret);}......}
run_init_process如下:
staticintrun_init_process(constchar *init_filename){const char *const *p;argv_init[0] = init_filename;pr_info("Run %s as init process\n", init_filename);pr_debug(" with arguments:\n");for (p = argv_init; *p; p++)pr_debug(" %s\n", *p);pr_debug(" with environment:\n");for (p = envp_init; *p; p++)pr_debug(" %s\n", *p);return kernel_execve(init_filename, argv_init, envp_init);}
intkthreadd(void *unused){static const char comm[TASK_COMM_LEN] = "kthreadd";struct task_struct *tsk = current;/* Setup a clean context for our children to inherit. */set_task_comm(tsk, comm);ignore_signals(tsk);set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));set_mems_allowed(node_states[N_MEMORY]);current->flags |= PF_NOFREEZE;cgroup_init_kthreadd();for (;;) {set_current_state(TASK_INTERRUPTIBLE);if (list_empty(&kthread_create_list))schedule();__set_current_state(TASK_RUNNING);spin_lock(&kthread_create_lock);while (!list_empty(&kthread_create_list)) {struct kthread_create_info *create;create = list_entry(kthread_create_list.next,struct kthread_create_info, list);list_del_init(&create->list);spin_unlock(&kthread_create_lock);create_kthread(create);spin_lock(&kthread_create_lock);}spin_unlock(&kthread_create_lock);}return 0;}
//内核代码include/linux/kthread.h#define kthread_create(threadfn, data, namefmt, arg...) \kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)//内核测试module:drivers/dma/dmatest.cstaticintdmatest_add_threads(struct dmatest_info *info,struct dmatest_chan *dtc, enum dma_transaction_type type){......for (i = 0; i < params->threads_per_chan; i++) {thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);......thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",dma_chan_name(chan), op, i);......}return i;}
//kthread_create是kthread_create_on_node//kthread_create_on_node调用__kthread_create_on_nodestruct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),void *data, int node,const char namefmt[],va_list args){DECLARE_COMPLETION_ONSTACK(done);struct task_struct *task;struct kthread_create_info *create = kmalloc(sizeof(*create),GFP_KERNEL);if (!create)return ERR_PTR(-ENOMEM);create->threadfn = threadfn;create->data = data;create->node = node;create->done = &done;create->full_name = kvasprintf(GFP_KERNEL, namefmt, args);if (!create->full_name) {task = ERR_PTR(-ENOMEM);goto free_create;}spin_lock(&kthread_create_lock);list_add_tail(&create->list, &kthread_create_list);spin_unlock(&kthread_create_lock);wake_up_process(kthreadd_task);/** Wait for completion in killable state, for I might be chosen by* the OOM killer while kthreadd is trying to allocate memory for* new kernel thread.*/if (unlikely(wait_for_completion_killable(&done))) {/** If I was killed by a fatal signal before kthreadd (or new* kernel thread) calls complete(), leave the cleanup of this* structure to that thread.*/if (xchg(&create->done, NULL))return ERR_PTR(-EINTR);/** kthreadd (or new kernel thread) will call complete()* shortly.*/wait_for_completion(&done);}task = create->result;free_create:kfree(create);return task;}