fork/clone時に返すpidの設定(1) - φ(・・*)ゞｳｰﾝ　カーネルとか弄ったりのメモ

fork/clone時にpidがどのように決まるのか見てみます。見ているのはカーネル v4.1です。今回はどういう流れで処理されているかを見ていきます。

pidの設定は数カ所でやっていて、copy_process()、copy_namespaces()、do_fork()内でcopy_process()をあります。

まずはpidを知る上で重要なpid構造体を見ておきます。pid構造体はinclude/linux/pid.hにて定義されています。

 57 struct pid
 58 {
 59         atomic_t count;
 60         unsigned int level;
 61         /* lists of tasks that use this pid */
 62         struct hlist_head tasks[PIDTYPE_MAX];
 63         struct rcu_head rcu;
 64         struct upid numbers[1];
 65 };
 66

upid構造体はこのようになっていてます。コメントからこの構造体がpidの主要な構造というのが読めます。

 44 /*
 45  * struct upid is used to get the id of the struct pid, as it is
 46  * seen in particular namespace. Later the struct pid is found with
 47  * find_pid_ns() using the int nr and struct pid_namespace *ns.
 48  */
 49
 50 struct upid {
 51         /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
 52         int nr;
 53         struct pid_namespace *ns;
 54         struct hlist_node pid_chain;
 55 };
 56

また、pid構造体のtasks配列は以下のような配列のindex番号が定義されてます。

  6 enum pid_type
  7 {
  8         PIDTYPE_PID,
  9         PIDTYPE_PGID,
 10         PIDTYPE_SID,
 11         PIDTYPE_MAX
 12 };

では、copy_process()内での処理から見ていきます。

その前に、copy_process()のI/Fはこうなっていて、5番目の引数にはpid構造体が渡されます。

1239 static struct task_struct *copy_process(unsigned long clone_flags,
1240                                         unsigned long stack_start,
1241                                         unsigned long stack_size,
1242                                         int __user *child_tidptr,
1243                                         struct pid *pid,
1244                                         int trace)
1245 {

do_fork()からはpid構造体はNULLで渡します。

1705         p = copy_process(clone_flags, stack_start, stack_size,
1706                          child_tidptr, NULL, trace);

そして、copy_process()で最初にpidが出てくるのはここです。pidはcopy_process()の引数の変数なのでdo_fork()から呼ばれた場合はNULLです。ここで呼んでいるalloc_pid()でpid構造体を初期化します。ここでpid番号が決まります。今回はここの処理はそういうものだというところに留めて先に進みます。ここは次回読んでいきます。

1454         if (pid != &init_struct_pid) {
1455                 pid = alloc_pid(p->nsproxy->pid_ns_for_children);
1456                 if (IS_ERR(pid)) {
1457                         retval = PTR_ERR(pid);
1458                         goto bad_fork_cleanup_io;
1459                 }
1460         }

次はこの部分です。

1495         /* ok, now we should be set up.. */
1496         p->pid = pid_nr(pid);

ここで、pはtask_struct構造体です。p->pidはpid構造体ではなくpid_t型（整数型）の変数です。

1393         pid_t pid;

pid_nr()はinclude/linux/pid.hで定義されています。pidは先程初期化したものなので0以外の番号が返るはずです。

164 static inline pid_t pid_nr(struct pid *pid)
165 {
166         pid_t nr = 0;
167         if (pid)
168                 nr = pid->numbers[0].nr;
169         return nr;
170 }

pid_nr()によってp->pidが決まりました。そして次ですが、clone(2)でスレッド生成する場合かどうかでtgidの設定方法が変わっています。スレッド生成時はカレントプロセスのtgidを引き継ぎ、新しくプロセス生成している場合はtgidはpid_nr()の返り値が設定されます。

1497         if (clone_flags & CLONE_THREAD) {
1498                 p->exit_signal = -1;
1499                 p->group_leader = current->group_leader;
1500                 p->tgid = current->tgid;
1501         } else {
1502                 if (clone_flags & CLONE_PARENT)
1503                         p->exit_signal = current->group_leader->exit_signal;
1504                 else
1505                         p->exit_signal = (clone_flags & CSIGNAL);
1506                 p->group_leader = p;
1507                 p->tgid = p->pid;
1508         }

copy_processでpidが関連するのはこの部分です。こちらはidle processの生成時の処理なので今回は飛ばします。

1557         if (likely(p->pid)) {
1558                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1559 
1560                 init_task_pid(p, PIDTYPE_PID, pid);
1561                 if (thread_group_leader(p)) {
1562                         init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
1563                         init_task_pid(p, PIDTYPE_SID, task_session(current));
1564 
1565                         if (is_child_reaper(pid)) {
1566                                 ns_of_pid(pid)->child_reaper = p;
1567                                 p->signal->flags |= SIGNAL_UNKILLABLE;
1568                         }
1569 
1570                         p->signal->leader_pid = pid;
1571                         p->signal->tty = tty_kref_get(current->signal->tty);
1572                         list_add_tail(&p->sibling, &p->real_parent->children);
1573                         list_add_tail_rcu(&p->tasks, &init_task.tasks);
1574                         attach_pid(p, PIDTYPE_PGID);
1575                         attach_pid(p, PIDTYPE_SID);
1576                         __this_cpu_inc(process_counts);
1577                 } else {
1578                         current->signal->nr_threads++;
1579                         atomic_inc(&current->signal->live);
1580                         atomic_inc(&current->signal->sigcnt);
1581                         list_add_tail_rcu(&p->thread_group,
1582                                           &p->group_leader->thread_group);
1583                         list_add_tail_rcu(&p->thread_node,
1584                                           &p->signal->thread_head);
1585                 }
1586                 attach_pid(p, PIDTYPE_PID);
1587                 nr_threads++;
1588         }

do_fork()ではcopy_process()の呼出し後にpidの設定があります。設定するのはもちろのcopy_process()が成功した場合です。

1711         if (!IS_ERR(p)) {
1712                 struct completion vfork;
1713                 struct pid *pid;
1714 
1715                 trace_sched_process_fork(current, p);
1716 
1717                 pid = get_task_pid(p, PIDTYPE_PID);
1718                 nr = pid_vnr(pid);
1719 
1720                 if (clone_flags & CLONE_PARENT_SETTID)
1721                         put_user(nr, parent_tidptr);
1722 
1723                 if (clone_flags & CLONE_VFORK) {
1724                         p->vfork_done = &vfork;
1725                         init_completion(&vfork);
1726                         get_task_struct(p);
1727                 }
1728 
1729                 wake_up_new_task(p);
1730 
1731                 /* forking complete and child started to run, tell ptracer */
1732                 if (unlikely(trace))
1733                         ptrace_event_pid(trace, pid);
1734 
1735                 if (clone_flags & CLONE_VFORK) {
1736                         if (!wait_for_vfork_done(p, &vfork))
1737                                 ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
1738                 }
1739 
1740                 put_pid(pid);
1741         } else {
1742                 nr = PTR_ERR(p);
1743         }

最初にget_task_pid()でpid構造体を取得します。まずtypeのチェックで、do_fork()ではPIDTYPE_PIDを渡しています。taskは生成中のtask_structです。typeがPIDTYPE_PID以外の場合はtaskがtaskのグループリーダに置き換わります。

465 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
466 {
467         struct pid *pid;
468         rcu_read_lock();
469         if (type != PIDTYPE_PID)
470                 task = task->group_leader;
471         pid = get_pid(task->pids[type].pid);
472         rcu_read_unlock();
473         return pid;

次にget_pid()でpid構造体の参照数を増やします。

 75 static inline struct pid *get_pid(struct pid *pid)
 76 {
 77         if (pid)
 78                 atomic_inc(&pid->count);
 79         return pid;
 80 }

get_pid()に渡したのはtask_structにあるこの構造体です。

1422         /* PID/PID hash table linkage. */
1423         struct pid_link pids[PIDTYPE_MAX];

 69 struct pid_link
 70 {
 71         struct hlist_node node;
 72         struct pid *pid;
 73 };

pidsはidle processの初期化時にinit_idle_pids()にて設定されています。

1649 static inline void init_idle_pids(struct pid_link *links)
1650 {
1651         enum pid_type type;
1652 
1653         for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
1654                 INIT_HLIST_NODE(&links[type].node); /* not really needed */
1655                 links[type].pid = &init_struct_pid;
1656         }
1657 }
1658

なので、get_pid()に渡したのはPIDTYPE_PIDがtypeのpid構造体になり、この構造体の参照カウントを増やしています。

do_fork()に戻って、以下の部分のnrがdo_fork()の返り値（fork(2)、clone(2)の返り値になるpidです）。なので本当にpidが決まるのはここですね。

1718                 nr = pid_vnr(pid);

pid_vnr()はこうです。task_active_pid_ns()は引数で渡されたプロセスのpid名前空間を取得します。

515 pid_t pid_vnr(struct pid *pid)
516 {
517         return pid_nr_ns(pid, task_active_pid_ns(current));
518 }

名前空間の取得にns_of_pid()とtask_pid()を使います。

546 struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
547 {
548         return ns_of_pid(task_pid(tsk));
549 }

task_pid()はpids配列からtypeがPIDのpid構造体を返します。

1767 static inline struct pid *task_pid(struct task_struct *task)
1768 {
1769         return task->pids[PIDTYPE_PID].pid;
1770 }

pid名前空間はupid構造体にあるので、levelをindexとして対象のupid構造体を選択し、その名前空間を返します。

134 static inline struct pid_namespace *ns_of_pid(struct pid *pid)
135 {
136         struct pid_namespace *ns = NULL;
137         if (pid)
138                 ns = pid->numbers[pid->level].ns;
139         return ns;
140 }

これでやっとpid_nr_ns()の2番目の引数が決まったので本題のpid_nr_ns()を見ていきます。まず、pid構造体がnullではないことと、名前空間の階層がとpid構造体の階層より大きくないかチェックしてます。pid名前空間はuts名前空間とは違い、リソースががらっと変わるわけではないのでこのようになっています。

処理としては名前空間に設定されているlevelをindexとしてpid構造体のupid構造体を取得します。これで取得したupid構造体に設定されている名前空間がtask_active_pid_ns()で取得したプロセスの名前空間と一致すれば、この構造体に設定されいるpid番号のnrを返します。

501 pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
502 {
503         struct upid *upid;
504         pid_t nr = 0;
505 
506         if (pid && ns->level <= pid->level) {
507                 upid = &pid->numbers[ns->level];
508                 if (upid->ns == ns)
509                         nr = upid->nr;
510         }
511         return nr;
512 }