Process Descriptor
- A simplified task_struct
- include/linux/sched.h
struct task_struct {
//////////////////////////Process AttributeRelated Fields//////////////////////////////////////
//The state of the process. Values are TASK_RUNNING, TASK_INTERRUPTIBLE,
// TASK_UNINTERRUPTIBLE, TASK_ZOMBIE, TASK_STOPPED and TASK_DEAD.
volatile long state;
//Flags define special attributes that belong to the task. Values are
//PF_STARTING, PF_EXITING, PF_DEAD and PF_FORKNOEXEC.
unsigned long flags;
//ptrace is set when the ptrace() system call is called on the process for performance measurements.
unsigned long ptrace;
//Linux supports a number of executable formats
struct linux_binfmt *binfmt;
//the task's exit value and exit signal.
int exit_code, exit_signal;
//pdeath_signal is a signal sent upon the parent's death.
int pdeath_signal;
//Process Identifer
pid_t pid;
//command line
char comm[16];
/////////////////////////Scheduling Related Fields///////////////////////////////////////
//static_prio is equivalent to the nice value. The default value of static_prio is MAX_PRIO-20.
//prio depends on the processes' scheduling history and the specified nice value
//he prio field holds +/- 5 of the value of static_prio
int prio, static_prio;
//The run_list field points to the runqueue. A runqueue holds a list of all the processes to run.
struct list_head run_list;
//The array field points to the priority array of a runqueue
prio_array_t *array;
//The sleep_avg field is used to calculate the effective priority of the task,
//which is the average amount of clock ticks the task has spent sleeping.
unsigned long sleep_avg;
//The timestamp field is used to calculate the sleep_avg for when a task sleeps or yields.
unsigned long long timestamp;
//The interactive_credit field is used along with the sleep_avg and activated fields to calculate sleep_avg.
long interactive_credit;
//The policy determines the type of process (for example, time sharing or real time).
unsigned long policy;
//The activated field keeps track of the incrementing and decrementing of sleep averages.
int activated;
//The cpus_allowed field specifies which CPUs might handle a task
cpumask_t cpus_allowed;
//The time_slice field defines the maximum amount of time the task is allowed to run.
//The first_time_slice field is repeatedly set to 0 and keeps track of the scheduling time.
unsigned int time_slice, first_time_slice;
//rt_priority is a static value that can only be updated through schedule().
// This value is necessary to support real-time tasks.
unsigned long rt_priority;
//real_parent points to the current process' parent's description.
// It will point to the process descriptor of init()
// if the original parent of our current process has been destroyed.
struct task_struct *real_parent;
//parent is a pointer to the descriptor of the parent process.
struct task_struct *parent;
//children is the struct that points to the list of our current process' children.
struct list_head children;
//sibling is the struct that points to the list of the current process' siblings.
struct list_head sibling;
//A process can be a member of a group of processes,
//and each group has one process defined as the group leader.
struct task_struct *group_leader;
////////////////////////Process CredentialsRelated Fields////////////////////////////////////////
//The uid field holds the user ID number of the user who created the process.
//The gid field holds the group ID of the group who owns the process.
//The euid effective user ID
//The egid effective group ID
//suid (saved user ID) and sgid (saved group ID) are used in the setuid() system calls.
//The fsuid and fsgid values are checked specifically for filesystem checks.
// They generally hold the same values as uid and gid except for when a setuid() system call is made.
uid_t uid, euid, suid, fsuid;
gid_t gid, egid, sgid, fsgid;
//In Linux, a user may be part of more than one group.
//These groups may have varying permissions with respect to system and data accesses.
// For this reason, the processes need to inherit this credential.
struct group_info *group_info;
/////////////////////////////Process CapabilitiesRelated Fields/////////////////////////////////////
//cap_effective. The capabilities that can be currently used by the process.
//cap_inheritable. The capabilities that are passed through a call to execve.
//cap_permitted. The capabilities that can be made either effective or inheritable.
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
//////////////////////////////task_struct Resource Limits////////////////////////////////////
struct rlimit rlim[RLIM_NLIMITS];
////////////////////////////Filesystem- and Address SpaceRelated Fields//////////////////////////////////////
//The fs field holds a pointer to filesystem information.
struct fs_struct *fs;
//The files field holds a pointer to the file descriptor table for the task.
struct files_struct *files;
//mm points to address-space and memory-managementrelated information.
//active_mm is a pointer to the most recently accessed address space.
struct mm_struct *mm, *active_mm;
//////////////////////////////////////////////////////////////////
pid_t tgid;
atomic_t usage;
int lock_depth;
struct thread_info *thread_info;
struct list_head tasks;
struct list_head ptrace_children;
struct list_head ptrace_list;
struct pid_link pids[PIDTYPE_MAX];
wait_queue_head_t wait_chldexit;
struct completion *vfork_done;
int __user *set_child_tid;
int __user *clear_child_tid;
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
struct timer_list real_timer;
unsigned long utime, stime, cutime, cstime;
//Different kinds of context switches exist.
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
u64 start_time;
int keep_capabilities:1;
struct user_struct *user;
unsigned short used_math;
int link_count, total_link_count;
unsigned long ptrace_message;
siginfo_t *last_siginfo;
};
Process Creation: fork()
, vfork()
, and clone()
System Calls
/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
{
struct task_struct *p;
int trace = 0;
long pid = alloc_pidmap();
if (pid < 0)
return -EAGAIN;
if (unlikely(current->ptrace)) {
trace = fork_traceflag (clone_flags);
if (trace)
clone_flags |= CLONE_PTRACE;
}
p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
struct completion vfork;
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
}
if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
/*
* We'll start up with an immediate SIGSTOP.
*/
sigaddset(&p->pending.signal, SIGSTOP);
set_tsk_thread_flag(p, TIF_SIGPENDING);
}
if (!(clone_flags & CLONE_STOPPED))
wake_up_new_task(p, clone_flags);
else
p->state = TASK_STOPPED;
if (unlikely (trace)) {
current->ptrace_message = pid;
ptrace_notify ((trace << 8) | SIGTRAP);
}
if (clone_flags & CLONE_VFORK) {
wait_for_completion(&vfork);
if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
}
} else {
free_pidmap(pid);
pid = PTR_ERR(p);
}
return pid;
}