diff -rc2P linux/Makefile linux.ctx/Makefile *** linux/Makefile Sun Oct 13 15:38:52 2002 --- linux.ctx/Makefile Fri Oct 25 17:10:43 2002 *************** *** 2,6 **** PATCHLEVEL = 4 SUBLEVEL = 18 ! EXTRAVERSION = KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) --- 2,6 ---- PATCHLEVEL = 4 SUBLEVEL = 18 ! EXTRAVERSION = -10-ctx12 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -rc2P linux/arch/i386/kernel/entry.S linux.ctx/arch/i386/kernel/entry.S *** linux/arch/i386/kernel/entry.S Sun Oct 13 15:38:37 2002 --- linux.ctx/arch/i386/kernel/entry.S Sat Oct 26 23:31:24 2002 *************** *** 650,653 **** --- 650,655 ---- .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_setaffinity */ .long SYMBOL_NAME(sys_ni_syscall) /* reserved for sched_getaffinity */ + .long SYMBOL_NAME(sys_new_s_context) /* 243 */ + .long SYMBOL_NAME(sys_set_ipv4root) /* 234 */ .rept NR_syscalls-(.-sys_call_table)/4 diff -rc2P linux/arch/i386/kernel/init_task.c linux.ctx/arch/i386/kernel/init_task.c *** linux/arch/i386/kernel/init_task.c Tue Sep 18 02:29:09 2001 --- linux.ctx/arch/i386/kernel/init_task.c Fri Nov 1 16:21:00 2002 *************** *** 2,5 **** --- 2,6 ---- #include #include + #include #include diff -rc2P linux/arch/i386/kernel/ptrace.c linux.ctx/arch/i386/kernel/ptrace.c *** linux/arch/i386/kernel/ptrace.c Sun Oct 13 15:38:39 2002 --- linux.ctx/arch/i386/kernel/ptrace.c Fri Oct 25 16:32:05 2002 *************** *** 171,175 **** get_task_struct(child); read_unlock(&tasklist_lock); ! if (!child) goto out; --- 171,175 ---- get_task_struct(child); read_unlock(&tasklist_lock); ! if (!child || child->s_context != current->s_context) goto out; diff -rc2P linux/fs/devpts/inode.c linux.ctx/fs/devpts/inode.c *** linux/fs/devpts/inode.c Thu Oct 25 11:02:26 2001 --- linux.ctx/fs/devpts/inode.c Fri Nov 1 13:35:28 2002 *************** *** 25,28 **** --- 25,30 ---- #include #include + #include + #include #include "devpts_i.h" *************** *** 153,157 **** inode->i_fop = &devpts_root_operations; inode->i_nlink = 2; - s->u.generic_sbp = (void *) sbi; s->s_blocksize = 1024; --- 155,158 ---- *************** *** 181,184 **** --- 182,198 ---- static DECLARE_FSTYPE(devpts_fs_type, "devpts", devpts_read_super, FS_SINGLE); + static int devpts_tty_permission(struct inode *inode, int mask) + { + int ret = -EACCES; + if (current->s_context->id == inode->u.devpts_i.s_context){ + ret = vfs_permission(inode, mask); + } + return ret; + } + + struct inode_operations devpts_tty_inode_operations = { + permission: devpts_tty_permission, + }; + void devpts_pty_new(int number, kdev_t device) { *************** *** 199,202 **** --- 213,218 ---- inode->i_gid = sbi->setgid ? sbi->gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->u.devpts_i.s_context = current->s_context->id; + inode->i_op = &devpts_tty_inode_operations; init_special_inode(inode, S_IFCHR|sbi->mode, kdev_t_to_nr(device)); diff -rc2P linux/fs/devpts/root.c linux.ctx/fs/devpts/root.c *** linux/fs/devpts/root.c Fri Dec 21 20:41:55 2001 --- linux.ctx/fs/devpts/root.c Fri Nov 1 13:33:41 2002 *************** *** 15,18 **** --- 15,20 ---- #include #include + #include + #include #include "devpts_i.h" *************** *** 65,69 **** while ( nr - 2 < sbi->max_ptys ) { int ptynr = nr - 2; ! if ( sbi->inodes[ptynr] ) { genptsname(numbuf, ptynr); if ( filldir(dirent, numbuf, strlen(numbuf), nr, nr, DT_CHR) < 0 ) --- 67,74 ---- while ( nr - 2 < sbi->max_ptys ) { int ptynr = nr - 2; ! struct inode *inode = sbi->inodes[ptynr]; ! if ( inode != NULL ! && (current->s_context->id == 1 ! || inode->u.devpts_i.s_context == current->s_context->id)) { genptsname(numbuf, ptynr); if ( filldir(dirent, numbuf, strlen(numbuf), nr, nr, DT_CHR) < 0 ) *************** *** 101,104 **** --- 106,110 ---- int i; const char *p; + struct inode *inode; dentry->d_op = &devpts_dentry_operations; *************** *** 127,135 **** return NULL; ! if ( sbi->inodes[entry] ) ! atomic_inc(&sbi->inodes[entry]->i_count); ! d_add(dentry, sbi->inodes[entry]); return NULL; } --- 133,148 ---- return NULL; ! inode = sbi->inodes[entry]; ! if (inode != NULL ! && inode->u.devpts_i.s_context == current->s_context->id){ ! atomic_inc(&inode->i_count); ! }else{ ! inode = NULL; ! } ! d_add(dentry, inode); return NULL; } + + diff -rc2P linux/fs/exec.c linux.ctx/fs/exec.c *** linux/fs/exec.c Sun Oct 13 15:38:37 2002 --- linux.ctx/fs/exec.c Fri Nov 1 14:10:37 2002 *************** *** 37,40 **** --- 37,41 ---- #include #include + #include #define __NO_VERSION__ #include *************** *** 693,697 **** int do_unlock = 0; ! new_permitted = cap_intersect(bprm->cap_permitted, cap_bset); working = cap_intersect(bprm->cap_inheritable, current->cap_inheritable); --- 694,698 ---- int do_unlock = 0; ! new_permitted = cap_intersect(bprm->cap_permitted, current->s_context->cap_bset); working = cap_intersect(bprm->cap_inheritable, current->cap_inheritable); diff -rc2P linux/fs/proc/array.c linux.ctx/fs/proc/array.c *** linux/fs/proc/array.c Sun Oct 13 15:38:38 2002 --- linux.ctx/fs/proc/array.c Tue Nov 5 10:15:47 2002 *************** *** 71,74 **** --- 71,75 ---- #include #include + #include #include *************** *** 76,79 **** --- 77,81 ---- #include #include + #include /* Gcc optimizes away "strlen(x)" for constant x */ *************** *** 148,153 **** { int g; ! read_lock(&tasklist_lock); buffer += sprintf(buffer, "State:\t%s\n" --- 150,159 ---- { int g; ! pid_t ppid; read_lock(&tasklist_lock); + ppid = p->p_opptr->pid; + if (ppid != 0 + && current->s_context != NULL + && current->s_context->initpid == ppid) ppid = 1; buffer += sprintf(buffer, "State:\t%s\n" *************** *** 159,163 **** "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), p->tgid, ! p->pid, p->pid ? p->p_opptr->pid : 0, 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); --- 165,169 ---- "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), p->tgid, ! p->pid, p->pid ? ppid : 0, 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); *************** *** 292,295 **** --- 298,344 ---- buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); + #ifdef __NR_new_s_context + if (task->s_context != NULL){ + int i; + buffer += sprintf (buffer,"s_context: %d ",task->s_context->id); + *buffer++ = '\n'; + }else{ + buffer += sprintf (buffer,"s_context: 0\n"); + } + buffer += sprintf (buffer,"__NR_new_s_context: %d\n",__NR_new_s_context); + buffer += sprintf (buffer,"__NR_set_ipv4root: %d rev2\n",__NR_set_ipv4root); + + #endif #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); *************** *** 344,347 **** --- 393,398 ---- read_lock(&tasklist_lock); ppid = task->pid ? task->p_opptr->pid : 0; + if (current->s_context != NULL + && current->s_context->initpid == ppid) ppid = 1; read_unlock(&tasklist_lock); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ diff -rc2P linux/fs/proc/base.c linux.ctx/fs/proc/base.c *** linux/fs/proc/base.c Sun Oct 13 15:38:35 2002 --- linux.ctx/fs/proc/base.c Fri Nov 1 13:37:18 2002 *************** *** 26,29 **** --- 26,30 ---- #include #include + #include /* *************** *** 1020,1023 **** --- 1021,1030 ---- goto out; + if (pid != 1 + && current->s_context->id != 1 + && task->s_context->id != current->s_context->id){ + free_task_struct(task); + goto out; + } inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO); *************** *** 1030,1034 **** inode->i_fop = &proc_base_operations; inode->i_nlink = 3; ! inode->i_flags|=S_IMMUTABLE; dentry->d_op = &pid_base_dentry_operations; --- 1037,1041 ---- inode->i_fop = &proc_base_operations; inode->i_nlink = 3; ! inode->i_flags|=S_IMMUTABLE; dentry->d_op = &pid_base_dentry_operations; *************** *** 1066,1069 **** --- 1073,1089 ---- if (!pid) continue; + /* Even if the pid 1 is not part of the security context */ + /* we show it anyway. This makes the security box */ + /* more standard (and helps pstree do its job) */ + /* So current process "knows" pid 1 exist anyway and can't */ + /* send any signal either */ + + /* A process with security context 1 can see all processes */ + if (pid != 1 + && current->s_context->id != 1 + && p->s_context->id != current->s_context->id) continue; + /* We hide the fakeinit process since we show it as process 1 */ + if (current->s_context != NULL + && current->s_context->initpid == pid) continue; if (--index >= 0) continue; diff -rc2P linux/fs/proc/root.c linux.ctx/fs/proc/root.c *** linux/fs/proc/root.c Sun Oct 13 15:38:35 2002 --- linux.ctx/fs/proc/root.c Fri Nov 1 13:24:45 2002 *************** *** 16,19 **** --- 16,20 ---- #include #include + #include #include *************** *** 69,72 **** --- 70,75 ---- #endif proc_bus = proc_mkdir("bus", 0); + + context_procfs_init(); } diff -rc2P linux/include/asm-i386/unistd.h linux.ctx/include/asm-i386/unistd.h *** linux/include/asm-i386/unistd.h Sun Oct 13 15:38:35 2002 --- linux.ctx/include/asm-i386/unistd.h Sat Oct 26 23:45:00 2002 *************** *** 248,251 **** --- 248,254 ---- #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 + #define __NR_new_s_context 243 + #define __NR_set_ipv4root 244 + /* user-visible error numbers are in the range -1 - -124: see */ diff -rc2P linux/include/linux/capability.h linux.ctx/include/linux/capability.h *** linux/include/linux/capability.h Wed Oct 30 09:48:14 2002 --- linux.ctx/include/linux/capability.h Thu Nov 7 12:52:44 2002 *************** *** 232,235 **** --- 232,236 ---- arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ + /* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 *************** *** 280,283 **** --- 281,288 ---- #define CAP_LEASE 28 + /* Allow opening special device file */ + + #define CAP_OPENDEV 29 + #ifdef __KERNEL__ /* diff -rc2P linux/include/linux/devpts_fs_info.h linux.ctx/include/linux/devpts_fs_info.h *** linux/include/linux/devpts_fs_info.h Thu Jan 1 03:00:00 1970 --- linux.ctx/include/linux/devpts_fs_info.h Fri Oct 25 16:32:05 2002 *************** *** 0 **** --- 1,4 ---- + struct devpts_inode_info{ + int s_context; + }; + diff -rc2P linux/include/linux/fs.h linux.ctx/include/linux/fs.h *** linux/include/linux/fs.h Wed Oct 30 09:48:14 2002 --- linux.ctx/include/linux/fs.h Thu Nov 7 12:52:44 2002 *************** *** 320,323 **** --- 320,324 ---- #include #include + #include /* *************** *** 512,515 **** --- 513,517 ---- struct usbdev_inode_info usbdev_i; struct jffs2_inode_info jffs2_i; + struct devpts_inode_info devpts_i; void *generic_ip; } u; diff -rc2P linux/include/linux/inetdevice.h linux.ctx/include/linux/inetdevice.h *** linux/include/linux/inetdevice.h Sun Oct 13 15:38:35 2002 --- linux.ctx/include/linux/inetdevice.h Tue Nov 5 10:46:22 2002 *************** *** 73,76 **** --- 73,77 ---- unsigned char ifa_prefixlen; char ifa_label[IFNAMSIZ]; + unsigned int s_context; }; diff -rc2P linux/include/linux/s_context.h linux.ctx/include/linux/s_context.h *** linux/include/linux/s_context.h Thu Jan 1 03:00:00 1970 --- linux.ctx/include/linux/s_context.h Sat Nov 9 15:11:10 2002 *************** *** 0 **** --- 1,121 ---- + #ifndef _S_CONTEXT_H_ + + #define _S_CONTEXT_H_ + + #include + #include + #include + + /* + We may have a different domainname and nodename for each security + context. By default, a security context share the same as its + parent, potentially the information in system_utsname + */ + #define S_CTX_INFO_LOCK 1 /* Can't request a new s_context */ + #define S_CTX_INFO_SCHED 2 /* All process in the s_context */ + /* Contribute to the schedular */ + #define S_CTX_INFO_NPROC 4 /* Limit number of processes in a context */ + #define S_CTX_INFO_PRIVATE 8 /* Noone can join this security context */ + #define S_CTX_INFO_INIT 16 /* This process wants to become the */ + /* logical process 1 of the security */ + /* context */ + #define NB_IPV4ROOT 16 + #define NB_S_CONTEXT 16 + #define VSERVER_VERSION "ctx-12" + #define MAX_S_CONTEXT 65535 /* Arbitrary limit */ + #define MAX_S_CONTEXT_SLEEP HZ/10 + /* + #define S_CONTEXT_DEBUG 1 + #define S_CONTEXT_DEBUG_LIST 1 + */ + struct task_box + { + struct task_box *next; + struct task_struct *task; + }; + + struct s_context + { + struct s_context *prev; + struct s_context *next; + /****************************/ + int id; + /* root is allowed to switch the current */ + /* security context using any in this table */ + char nodename[sizeof(system_utsname.nodename)]; + char domainname[sizeof(system_utsname.domainname)]; + + int flags; /* S_CTX_INFO_xxx */ + __u32 cap_bset; /* Maximum capability of this context and children */ + int nbipv4; + __u32 ipv4[NB_IPV4ROOT]; /* Process can only bind to these IPs */ + /* The first one is used to connect */ + /* and for bind any service */ + /* The other must be used explicity when */ + /* binding */ + __u32 v4_bcast; /* Broadcast address used to receive UDP packets */ + int initpid; /* PID of the logical process 1 of the */ + unsigned int PROC_RLIMIT; /* processes limit on context */ + atomic_t sleeptime; + + /* statistic */ + atomic_t ticks; /* Number of ticks used by all process */ + /* in the s_context */ + + rwlock_t tasks_lock; + unsigned int task_count; + struct task_box *tasklist; + + rwlock_t socket_lock; + unsigned int socket_count; + }; + + extern struct s_context root_context; + extern rwlock_t s_context_lock; + + #define INIT_S_CONTEXT \ + { \ + prev: NULL,\ + next: NULL,\ + id: 0,\ + nodename: {0},\ + domainname: {0},\ + flags: 0,\ + cap_bset: CAP_INIT_EFF_SET,\ + nbipv4: 0,\ + ipv4: {0},\ + v4_bcast: ~0,\ + initpid: 1,\ + PROC_RLIMIT: 0,\ + sleeptime: {0},\ + ticks: {0},\ + tasks_lock: RW_LOCK_UNLOCKED, \ + task_count: 0,\ + tasklist: NULL,\ + socket_lock: RW_LOCK_UNLOCKED,\ + socket_count: 0 \ + } + + #define for_each_s_context(p) \ + for(p = &root_context; (p=p->next) != &root_context; ) + + #define check_s_context_proc_limit(p) \ + (p != NULL) && ( (p->flags & S_CTX_INFO_NPROC)!=0 ) \ + && ( p->task_count >=p->PROC_RLIMIT ) + + #define for_each_task_in_context(ctx,task) \ + for( task = ctx->tasklist; task != NULL; task = task -> next ) + + + /* syscall */ + int sys_new_s_context(int ctx, __u32 remove_cap, int flags); + int sys_set_ipv4root (__u32 ip[], int nbip, __u32 bcast); + /***********************/ + /* new api*/ + void context_procfs_init(void); + void s_context_init(void); + /* work with task list locked */ + void s_context_addtask(unsigned int context,struct task_struct *task); + void s_context_deltask(struct task_struct *task); + + #endif diff -rc2P linux/include/linux/sched.h linux.ctx/include/linux/sched.h *** linux/include/linux/sched.h Wed Oct 30 09:48:14 2002 --- linux.ctx/include/linux/sched.h Sat Nov 9 13:25:28 2002 *************** *** 130,133 **** --- 130,135 ---- #include + struct s_context; + /* * This serializes "schedule()" and also protects *************** *** 276,279 **** --- 278,282 ---- struct user_struct *next, **pprev; uid_t uid; + int s_context; }; *************** *** 283,286 **** --- 286,290 ---- __user; }) + extern struct user_struct root_user; #define INIT_USER (&root_user) *************** *** 400,403 **** --- 404,408 ---- size_t sas_ss_size; int (*notifier)(void *priv); + void *notifier_data; sigset_t *notifier_mask; *************** *** 407,411 **** void (*tux_exit)(void); - /* Thread group tracking */ u32 parent_exec_id; --- 412,415 ---- *************** *** 416,419 **** --- 420,425 ---- /* journalling filesystem info */ void *journal_info; + /* secure context info */ + struct s_context *s_context; }; *************** *** 515,518 **** --- 521,525 ---- alloc_lock: SPIN_LOCK_UNLOCKED, \ journal_info: NULL, \ + s_context: &root_context \ } *************** *** 566,570 **** /* per-UID process charging. */ ! extern struct user_struct * alloc_uid(uid_t); extern void free_uid(struct user_struct *); --- 573,577 ---- /* per-UID process charging. */ ! extern struct user_struct * alloc_uid(int, uid_t); extern void free_uid(struct user_struct *); Binary files linux/include/linux/tcedit.dst and linux.ctx/include/linux/tcedit.dst differ diff -rc2P linux/include/net/route.h linux.ctx/include/net/route.h *** linux/include/net/route.h Wed Oct 30 09:48:15 2002 --- linux.ctx/include/net/route.h Sun Nov 10 05:31:01 2002 *************** *** 33,36 **** --- 33,37 ---- #include #include + #include #ifndef __KERNEL__ *************** *** 165,168 **** --- 166,183 ---- { int err; + if (current->s_context != NULL){ + __u32 ipv4root = current->s_context->ipv4[0]; + if (ipv4root != 0){ + if (src == 0){ + src = dst == 0x0100007f + ? 0x0100007f: ipv4root; + }else if (ipv4root != src){ + return -EPERM; + } + if (dst == 0x0100007f && current->s_context != NULL){ + dst = ipv4root; + } + } + } err = ip_route_output(rp, dst, src, tos, oif); if (err || (dst && src)) diff -rc2P linux/include/net/sock.h linux.ctx/include/net/sock.h *** linux/include/net/sock.h Wed Oct 30 09:48:15 2002 --- linux.ctx/include/net/sock.h Sun Nov 10 05:31:02 2002 *************** *** 526,529 **** --- 526,530 ---- unsigned int allocation; /* Allocation mode */ int sndbuf; /* Size of send buffer in bytes */ + __u32 bcast_addr; /* Local bcast addr, for ipv4root */ struct sock *prev; *************** *** 669,673 **** /* RPC and TUX layer private data */ void *user_data; ! /* Callbacks */ void (*state_change)(struct sock *sk); --- 670,677 ---- /* RPC and TUX layer private data */ void *user_data; ! ! /* Context of process creating this socket */ ! int s_context; ! /* Callbacks */ void (*state_change)(struct sock *sk); diff -rc2P linux/include/net/tcp.h linux.ctx/include/net/tcp.h *** linux/include/net/tcp.h Wed Oct 30 09:48:15 2002 --- linux.ctx/include/net/tcp.h Sun Nov 10 05:31:02 2002 *************** *** 191,194 **** --- 191,195 ---- struct in6_addr v6_rcv_saddr; #endif + int s_context; }; diff -rc2P linux/init/main.c linux.ctx/init/main.c *** linux/init/main.c Sun Oct 13 15:38:36 2002 --- linux.ctx/init/main.c Sun Nov 3 08:39:44 2002 *************** *** 74,77 **** --- 74,79 ---- #endif + #include + /* * Versions of gcc older than that listed below may actually compile *************** *** 356,361 **** trap_init(); init_IRQ(); ! sched_init(); ! softirq_init(); time_init(); --- 358,364 ---- trap_init(); init_IRQ(); ! s_context_init(); ! sched_init(); ! softirq_init(); time_init(); *************** *** 546,549 **** --- 549,553 ---- init_pcmcia_ds(); /* Do this last */ #endif + } diff -rc2P linux/ipc/util.c linux.ctx/ipc/util.c *** linux/ipc/util.c Mon Aug 13 04:37:53 2001 --- linux.ctx/ipc/util.c Fri Nov 1 16:15:37 2002 *************** *** 20,23 **** --- 20,24 ---- #include #include + #include #if defined(CONFIG_SYSVIPC) *************** *** 94,97 **** --- 95,99 ---- for (id = 0; id <= ids->max_id; id++) { + if (ids->entries[id].s_context != current->s_context->id) continue; p = ids->entries[id].p; if(p==NULL) *************** *** 168,171 **** --- 170,174 ---- spin_lock(&ids->ary); ids->entries[id].p = new; + ids->entries[id].s_context = current->s_context->id; return id; } diff -rc2P linux/ipc/util.h linux.ctx/ipc/util.h *** linux/ipc/util.h Mon Feb 19 21:18:18 2001 --- linux.ctx/ipc/util.h Sun Nov 10 05:31:02 2002 *************** *** 6,12 **** --- 6,15 ---- */ + #include + #define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) + void sem_init (void); void msg_init (void); *************** *** 26,29 **** --- 29,33 ---- struct ipc_id { struct kern_ipc_perm* p; + int s_context; // Context owning this ID }; *************** *** 75,80 **** spin_lock(&ids->ary); out = ids->entries[lid].p; ! if(out==NULL) spin_unlock(&ids->ary); return out; } --- 79,88 ---- spin_lock(&ids->ary); out = ids->entries[lid].p; ! if(out==NULL ! || (ids->entries[lid].s_context != current->s_context->id ! && current->s_context->id != 1)){ spin_unlock(&ids->ary); + out = NULL; + } return out; } diff -rc2P linux/kernel/Makefile linux.ctx/kernel/Makefile *** linux/kernel/Makefile Sun Oct 13 15:38:49 2002 --- linux.ctx/kernel/Makefile Sun Oct 27 14:43:21 2002 *************** *** 11,20 **** export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o \ ! syscall_ksyms.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o acct.o capability.o ptrace.o timer.o user.o \ ! signal.o sys.o kmod.o context.o kksymoops.o syscall_ksyms.o obj-$(CONFIG_UID16) += uid16.o --- 11,21 ---- export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o \ ! syscall_ksyms.o s_context.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ sysctl.o acct.o capability.o ptrace.o timer.o user.o \ ! signal.o sys.o kmod.o context.o kksymoops.o syscall_ksyms.o \ ! s_context.o obj-$(CONFIG_UID16) += uid16.o diff -rc2P linux/kernel/exit.c linux.ctx/kernel/exit.c *** linux/kernel/exit.c Sun Oct 13 15:38:38 2002 --- linux.ctx/kernel/exit.c Fri Nov 1 10:32:02 2002 *************** *** 17,20 **** --- 17,21 ---- #include #endif + #include #include *************** *** 42,47 **** current->cmaj_flt += p->maj_flt + p->cmaj_flt; current->cnswap += p->nswap + p->cnswap; ! sched_exit(p); ! p->pid = 0; free_task_struct(p); } --- 43,49 ---- current->cmaj_flt += p->maj_flt + p->cmaj_flt; current->cnswap += p->nswap + p->cnswap; ! sched_exit(p); ! s_context_deltask(p); ! p->pid = 0; free_task_struct(p); } *************** *** 206,211 **** --- 208,226 ---- { struct task_struct * p, *reaper; + struct task_struct *vchild_reaper = child_reaper; read_lock(&tasklist_lock); + /* if fake init find init task this context */ + if (father->s_context != NULL) + { + pid_t initpid = father->s_context->initpid; + if (initpid != 0 + && father->pid != initpid) + { + struct task_struct *r = find_task_by_pid(initpid); + if (r != NULL) vchild_reaper = r; + } + } + /* Next in our thread group, if they're not already exiting */ *************** *** 218,222 **** if (reaper == father) ! reaper = child_reaper; for_each_task(p) { --- 233,237 ---- if (reaper == father) ! reaper = vchild_reaper; for_each_task(p) { *************** *** 228,232 **** /* Make sure we're not reparenting to ourselves */ if (p == reaper) ! p->p_opptr = child_reaper; else p->p_opptr = reaper; --- 243,247 ---- /* Make sure we're not reparenting to ourselves */ if (p == reaper) ! p->p_opptr = vchild_reaper; else p->p_opptr = reaper; diff -rc2P linux/kernel/fork.c linux.ctx/kernel/fork.c *** linux/kernel/fork.c Sun Oct 13 15:38:37 2002 --- linux.ctx/kernel/fork.c Mon Nov 4 08:03:36 2002 *************** *** 11,15 **** * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' */ - #include #include --- 11,14 ---- *************** *** 18,26 **** #include #include - #include #include #include #include #include #include --- 17,25 ---- #include #include #include #include #include #include + #include #include *************** *** 615,618 **** --- 614,624 ---- retval = -EAGAIN; + // printk("fork: current->s_context = %x \n", current->s_context); + // printk("fork: p->s_context = %x \n", p->s_context); + if( check_s_context_proc_limit(p->s_context) ) + { + goto bad_fork_free; + } + /* * Check if we are over our maximum process limit, but be sure to *************** *** 768,771 **** --- 774,781 ---- hash_pid(p); nr_threads++; + if(current->s_context) + { + s_context_addtask(current->s_context->id,p); + } write_unlock_irq(&tasklist_lock); diff -rc2P linux/kernel/printk.c linux.ctx/kernel/printk.c *** linux/kernel/printk.c Sun Oct 13 15:38:25 2002 --- linux.ctx/kernel/printk.c Fri Oct 25 16:32:05 2002 *************** *** 174,177 **** --- 174,179 ---- int error = 0; + if (!capable(CAP_SYS_ADMIN) && current->s_context != 0) return -EPERM; + switch (type) { case 0: /* Close log */ diff -rc2P linux/kernel/s_context.c linux.ctx/kernel/s_context.c *** linux/kernel/s_context.c Thu Jan 1 03:00:00 1970 --- linux.ctx/kernel/s_context.c Sun Nov 10 08:48:29 2002 *************** *** 0 **** --- 1,666 ---- + #include + #include + #include + #include + #include + #include + + #include + /* + #define S_CONTEXT_DEBUG 1 + #define S_CONTEXT_DEBUG_LIST 1 + */ + /***********************************************************/ + rwlock_t s_context_lock = RW_LOCK_UNLOCKED; /* context locker */ + unsigned int context_count = 1; + struct s_context root_context = INIT_S_CONTEXT; + /* struct s_context context_1 = INIT_S_CONTEXT; */ + + /************** procfs start *********************************/ + + static struct proc_dir_entry *vserver_root = NULL; + + static int read_setup_vserver(char *page, char **start, off_t off, + int count, int *eof, void *data) + { + size_t len = 0; + + len = sprintf(page , "vserver patch %s\n", VSERVER_VERSION); + len += sprintf(page+len, "new context syscal %d\n", __NR_new_s_context); + len += sprintf(page+len, "ipv4root syscal %d\n", __NR_set_ipv4root); + + read_lock(s_context_lock); + len += sprintf(page+len, "contexts count %d\n",context_count); + read_unlock(s_context_lock); + + *start = page + off; + if( off + count >= len ) + { + *eof = 1; + } + else + { + *eof = 0; + } + + len -= off; + if (len > count) + { + len = count; + } + if (len < 0) + { + len = 0; + } + + return len; + } + + static int read_ctx_setup(char *page, char **start, off_t off, + int count, int *eof, void *data) + { + size_t len = 0; + unsigned int i = 0; + struct s_context *ctx = data; + struct task_box *tbox = NULL; + + + read_lock(s_context_lock); + if( ctx == NULL ) + { + /* self */ + ctx = current -> s_context; + } + + if( ( current -> s_context == NULL ) + || ( ctx -> id != current -> s_context -> id ) ) + { + read_unlock(s_context_lock); + return -EPERM; + } + + len = sprintf(page , "vserver id %d\n", ctx -> id); + if( ctx -> nodename[0] != 0 ) + { + len += sprintf(page+len, "nodename %s\n", ctx -> nodename); + } + if( ctx -> domainname[0] != 0 ) + { + len += sprintf(page+len, "domainname %s\n", ctx -> domainname); + } + + len += sprintf(page+len, "flags: %x - ", ctx -> flags); + if( (ctx -> flags & S_CTX_INFO_LOCK) != 0 ) + { + strcat(page+len,"LOCK "); + len += sizeof("LOCK ")-1; + } + if( (ctx -> flags & S_CTX_INFO_SCHED) != 0 ) + { + strcat(page+len,"SCHED "); + len += sizeof("SCHED ")-1; + } + if( (ctx -> flags & S_CTX_INFO_NPROC) != 0 ) + { + strcat(page+len,"NPROC "); + len += sizeof("NPROC ")-1; + } + if( (ctx -> flags & S_CTX_INFO_PRIVATE) != 0 ) + { + strcat(page+len,"PRIVATE "); + len += sizeof("PRIVATE ")-1; + } + *(page+len)='\n'; + len ++; + len += sprintf(page+len, "init pid: %d\n", ctx -> initpid); + len += sprintf(page+len, "CapBset: %016x\n", cap_t(ctx -> cap_bset) ); + len += sprintf(page+len, "Process Limit: %d\n", ctx -> PROC_RLIMIT ); + len += sprintf(page+len, "tasks: "); + + for_each_task_in_context(ctx,tbox) + { + len += sprintf(page+len, "%d ", tbox->task->pid); + } + + *(page+len)='\n'; len ++; + + read_unlock(s_context_lock); + + *start = page + off; + if( off + count >= len ) + { + *eof = 1; + } + else + { + *eof = 0; + } + + len -= off; + if (len > count) + { + len = count; + } + if (len < 0) + { + len = 0; + } + + return len; + } + + + void context_procfs_init() + { + vserver_root = proc_mkdir("vservers",0); + + if( vserver_root == NULL ) + { + return; + } + + create_proc_read_entry("setup", 0, vserver_root, &read_setup_vserver, NULL ); + create_proc_read_entry("0", 0, vserver_root, &read_ctx_setup, &root_context ); + create_proc_read_entry("self", 0, vserver_root, &read_ctx_setup, NULL ); + } + + + /************** procfs end *********************************/ + + + /************** s_context start *********************************/ + + /* NEED LOCK s_context_lock */ + inline struct s_context* find_s_context_by_num(unsigned int context) + { + struct s_context* ctx = NULL; + + #ifdef S_CONTEXT_DEBUG_LIST + printk("try find context %d \n", context); + #endif + + if( context >= MAX_S_CONTEXT ) + { + return NULL; + } + + for_each_s_context(ctx) + { + #ifdef S_CONTEXT_DEBUG_LIST + printk("try find in list %d - %d \n", ctx->id, context); + #endif + if( ctx->id == context ) + { + break; + } + } + + #ifdef S_CONTEXT_DEBUG_LIST + printk("found in list %d - %d \n", ctx->id, context); + #endif + + return( (ctx->id == context) ? ctx : NULL ); + } + + void add_s_context_to_list(struct s_context* new_ctx) + { + struct s_context* ctx; + char str_id[7]; + + if( new_ctx == NULL ) + { + return; + } + #ifdef S_CONTEXT_DEBUG_LIST + printk("add s_context to list. ctx id %d\n", new_ctx -> id ); + #endif + + write_lock( s_context_lock ); + ctx = find_s_context_by_num(new_ctx->id); + if( ctx != NULL ) + { + write_unlock( s_context_lock ); + printk("s_context error: ctx %d already in list\n", new_ctx -> id); + vfree( new_ctx ); + return; + } + + ctx = root_context.prev; + ctx -> next = new_ctx; + new_ctx -> next = & root_context; + new_ctx -> prev = ctx; + root_context.prev = new_ctx; + context_count ++; + write_unlock( s_context_lock ); + + if( vserver_root != NULL ) + { + snprintf(str_id, sizeof( str_id ) - 1, "%d", new_ctx->id); + create_proc_read_entry(str_id, 0, vserver_root, &read_ctx_setup, new_ctx ); + } + } + + + void delete_s_context_from_list(unsigned int s_ctx_id) + { + struct s_context* ctx = NULL; + char str_id[7]; + + #ifdef S_CONTEXT_DEBUG_LIST + printk("delete s_context from list. ctx id %d\n", s_ctx_id ); + #endif + + if( s_ctx_id >= MAX_S_CONTEXT ) + { + return; + } + + write_lock( s_context_lock ); + ctx = find_s_context_by_num( s_ctx_id ); + if( ctx == NULL ) + { + write_unlock( s_context_lock ); + printk("s_context error: ctx %d not in list\n", s_ctx_id); + return; + } + ctx -> prev -> next = ctx -> next; + ctx -> next -> prev = ctx -> prev; + context_count --; + write_unlock( s_context_lock ); + + if( vserver_root != NULL ) + { + snprintf(str_id, sizeof( str_id ) - 1, "%d", s_ctx_id); + remove_proc_entry(str_id, vserver_root); + } + } + + void s_context_addtask(unsigned int context,struct task_struct *task) + { + struct s_context* ctx = NULL; + struct task_box *ctx_tasks = NULL; + struct task_box *addtask = NULL; + #ifdef S_CONTEXT_DEBUG + printk("s_context_addtask - ctx %d\n", context ); + #endif + + read_lock(s_context_lock); + + ctx = find_s_context_by_num(context); + if( ctx == NULL ) + { + #ifdef S_CONTEXT_DEBUG + printk("s_context_addtask - not found context !\n"); + #endif + return; + } + + addtask = vmalloc( sizeof(struct task_box)); + if( addtask == NULL ) + { + #ifdef S_CONTEXT_DEBUG + printk("s_context_addtask - not have memory for add task !\n"); + #endif + read_unlock( s_context_lock ); + return; + } + + write_lock(ctx->tasks_lock); + #ifdef S_CONTEXT_DEBUG_LIST + printk("ctx %x\n task %x addtask %x\n", ctx, task, addtask ); + #endif + + task -> s_context = ctx; + ctx -> task_count ++; + ctx_tasks = ctx -> tasklist; + addtask -> task = task; + addtask -> next = ctx -> tasklist; + ctx -> tasklist = addtask; + + write_unlock(ctx->tasks_lock); + read_unlock(s_context_lock); + return; + } + + void s_context_deltask(struct task_struct *task) + { + struct s_context *ctx; + struct task_box *tbox; + struct task_box *tbox_prev; + int delete = 0; + unsigned int id = 0; + + #ifdef S_CONTEXT_DEBUG + printk("s_context_deltask\n"); + #endif + + read_lock(s_context_lock); + ctx = task->s_context; + if( ctx == NULL ) + { + return; + } + #ifdef S_CONTEXT_DEBUG + printk("task %x ctx %x\n", task, ctx); + #endif + + write_lock( ctx -> tasks_lock); + #ifdef S_CONTEXT_DEBUG + printk("try find task %x\n", ctx->tasklist) ; + #endif + + tbox_prev = NULL; + for_each_task_in_context(ctx,tbox) + { + #ifdef S_CONTEXT_DEBUG_LIST + printk("%x - %x\n", tbox->task, task ); + #endif + if( tbox->task == task ) + { + break; + } + tbox_prev = tbox; + } + + if( tbox != NULL ) + { + #ifdef S_CONTEXT_DEBUG + printk("found. delete it \n") ; + #endif + if( tbox_prev != NULL ) + { + tbox_prev -> next = tbox -> next; + } + else + { + ctx -> tasklist = tbox -> next; + } + vfree(tbox); + } + ctx -> task_count --; + delete = (ctx -> task_count == 0); + id = ctx->id; + task -> s_context = NULL; + write_unlock(ctx -> tasks_lock); + read_unlock(s_context_lock); + + if( delete != 0 ) + { + #ifdef S_CONTEXT_DEBUG + printk("delete empty context \n") ; + #endif + delete_s_context_from_list( id ); + vfree(ctx); + } + + return; + } + + void s_context_init() + { + struct s_context* ctx; + + #ifdef S_CONTEXT_DEBUG + printk("s_context init\n"); + #endif + + s_context_lock = RW_LOCK_UNLOCKED; + context_count = 1; + + + memset(&root_context,0,sizeof(struct s_context)); + root_context.next = &root_context; + root_context.prev = &root_context; + root_context.id = 0; + root_context.cap_bset = CAP_FULL_SET; + /* + context_1.id = 1; + add_s_context_to_list( &context_1 ); + */ + #ifdef S_CONTEXT_DEBUG + printk("s_context init\n"); + #endif + + } + + /************** s_context end *********************************/ + + static int set_initpid (int flags) + { + int ret = 0; + + if ((flags & S_CTX_INFO_INIT)!=0){ + if (current->s_context == NULL){ + ret = -EINVAL; + }else if (current->s_context->initpid != 0){ + ret = -EPERM; + }else{ + current->s_context->initpid = current->tgid; + } + } + return ret; + } + + static inline int switch_user_struct(int new_context) + { + struct user_struct *new_user; + + new_user = alloc_uid(new_context, current->uid); + if (!new_user) + return -ENOMEM; + + if (new_user != current->user) { + struct user_struct *old_user = current->user; + + atomic_inc(&new_user->processes); + atomic_dec(&old_user->processes); + current->user = new_user; + free_uid(old_user); + } + return 0; + } + + /* + Change to a new security context and reduce the capability + basic set of the current process + */ + asmlinkage int + sys_new_s_context(int ctx, __u32 remove_cap, int flags) + { + int ret = -EPERM; + struct s_context *new_context; + + #ifdef S_CONTEXT_DEBUG + printk("sys_new_s_context: ctx %d \n", ctx); + #endif + if( ( ctx <= -2 ) || (ctx > MAX_S_CONTEXT) ) + { + return -EINVAL; + } + switch( ctx ) + { + /* allocate new s_context. ctx_id if */ + case -1: + { + int new_ctx_id; + + for( new_ctx_id=2; new_ctx_id < MAX_S_CONTEXT; new_ctx_id ++ ) + { + if( find_s_context_by_num( new_ctx_id ) == NULL ) + { + break; + } + } + + if( new_ctx_id == MAX_S_CONTEXT ) + { + /* to full */ + break; + } + ret = switch_user_struct(new_ctx_id); + if( ret != 0 ) + { + break; + } + new_context = vmalloc( sizeof( struct s_context )); + if( new_context != NULL ) + { + memset( new_context, 0, sizeof( struct s_context)); + new_context -> id = new_ctx_id; + new_context -> cap_bset = CAP_INIT_EFF_SET; + new_context -> cap_bset &= (~remove_cap); + new_context -> flags |= flags; + new_context -> PROC_RLIMIT = current ->rlim[RLIMIT_NPROC].rlim_max; + add_s_context_to_list( new_context ); + + s_context_deltask( current ); + s_context_addtask( new_ctx_id, current ); + set_initpid (flags); + + ret = new_ctx_id; + } + break; + } + /* We keep the same s_context, but lower the capabilities */ + case -2: + { + if( current->s_context == NULL ) break; + ret = set_initpid(flags); + if (ret == 0) + { + /* We keep the same s_context, but lower the capabilities */ + current->s_context->cap_bset &= (~remove_cap); + ret = current->s_context->id; + if ((flags & S_CTX_INFO_INIT)!=0) + { + current->s_context->initpid = current->tgid; + } + current->s_context->flags |= flags; + new_context -> PROC_RLIMIT = current ->rlim[RLIMIT_NPROC].rlim_max; + } + break; + } + /* allocate predefined s_context or join to that */ + default: + { + if ( ((current->s_context == NULL) && capable(CAP_SYS_ADMIN) ) + || ( current->s_context->id == 0 ) + || ( (current->s_context->flags & S_CTX_INFO_LOCK) == 0 )) + { + /* The root context can become any context it wants */ + new_context = find_s_context_by_num( ctx ); + if( ( new_context != NULL ) && + ( (new_context -> flags & S_CTX_INFO_PRIVATE) != 0 ) ) + { + ret = -EPERM; + break; + } + if( new_context == NULL ) + { + /* not found - create */ + new_context = vmalloc( sizeof( struct s_context )); + if( new_context == NULL ) + { + /* not have memory - exit*/ + break; + } + memset( new_context, 0, sizeof( struct s_context)); + new_context -> id = ctx; + new_context -> cap_bset = CAP_INIT_EFF_SET; + new_context -> cap_bset &= (~remove_cap); + new_context -> flags |= flags; + new_context -> PROC_RLIMIT = current ->rlim[RLIMIT_NPROC].rlim_max; + add_s_context_to_list( new_context ); + } + #ifdef S_CONTEXT_DEBUG + printk("switch to ctx %d \n", ctx); + #endif + ret = switch_user_struct(ctx); + if (ret == 0) + { + #ifdef S_CONTEXT_DEBUG + printk("switch ok\n"); + #endif + s_context_deltask( current ); + s_context_addtask( ctx, current ); + set_initpid (flags); + } + break; + } + } + } + #ifdef S_CONTEXT_DEBUG + printk("return %d\n", ret); + #endif + + return ret; + } + + asmlinkage int sys_set_ipv4root (__u32 ip[], int nbip, __u32 bcast) + { + int ret = -EPERM; + __u32 tbip[NB_IPV4ROOT]; + struct s_context *ctx = current->s_context; + + #ifdef S_CONTEXT_DEBUG + printk("sys_set_ipv4root:"); + #endif + if( ctx == NULL ) + { + return -EPERM; + } + + if (nbip < 0 || nbip > NB_IPV4ROOT) + { + return -EINVAL; + } + + if (copy_from_user(tbip,ip,nbip*sizeof(ip[0]))!=0) + { + return -EFAULT; + } + + if ( ctx->ipv4[0] == 0 + || capable(CAP_NET_ADMIN)) + { + // We are allowed to change everything + ret = 0; + } + else if (ctx -> ipv4[0] != 0 ) + { + // We are allowed to select a subset of the currently + // installed IP numbers. No new one allowed + // We can't change the broadcast address though + int i; + int found = 0; + + for (i=0; inbipv4; j++) + { + if (ipi == ctx->ipv4[j]) + { + found++; + break; + } + } + } + if (found == nbip && bcast == ctx->v4_bcast) + { + ret = 0; + } + } // else if + + if( ret == 0 ) + { + // assign + ctx->nbipv4 = nbip; + memcpy (ctx->ipv4,tbip,nbip*sizeof(tbip[0])); + ctx->v4_bcast = bcast; + } + return ret; + } + diff -rc2P linux/kernel/sched.c linux.ctx/kernel/sched.c *** linux/kernel/sched.c Sun Oct 13 15:38:38 2002 --- linux.ctx/kernel/sched.c Sun Nov 10 06:00:38 2002 *************** *** 21,24 **** --- 21,25 ---- #include #include + #include /* *************** *** 634,637 **** --- 635,697 ---- #endif + void balance_contexts(void) + { + struct s_context *ctx; + struct task_box *tbox; + int resched = 0; + + for_each_s_context(ctx) + { + if( ( (ctx-> flags) & S_CTX_INFO_SCHED ) != 0 ) + { + unsigned long ctx_sleep = jiffies - atomic_read(&ctx->sleeptime); + + if( ctx_sleep < MAX_S_CONTEXT_SLEEP ) + { + continue; + } + + for_each_task_in_context(ctx,tbox) + { + task_t *p = tbox -> task; + unsigned long sleep_time; + unsigned long flags; + runqueue_t *rq; + prio_array_t *array; + int old_prio; + + rq = task_rq_lock(p, &flags); + array = p->array; + sleep_time = (( jiffies - p->sleep_timestamp) + ctx_sleep )>>1; + p->sleep_avg += sleep_time; + old_prio = p -> prio; + if (p->sleep_avg > MAX_SLEEP_AVG) + { + p->sleep_avg = MAX_SLEEP_AVG; + } + if( array ) + { + dequeue_task(p, array); + } + p->prio = effective_prio(p); + if (p->prio <= old_prio ) + { + /* mark to reschedule if new task + priority high with current */ + resched = 1; + } + if( array ) + { + enqueue_task(p, array); + } + task_rq_unlock(rq, &flags); + } + if( resched ) + { + set_need_resched(); + } + } + } + } /* * We place interactive tasks back into the active array, if possible. *************** *** 714,718 **** enqueue_task(p, rq->expired); } else ! enqueue_task(p, rq->active); } out: --- 774,778 ---- enqueue_task(p, rq->expired); } else ! enqueue_task(p, rq->active); } out: *************** *** 721,724 **** --- 781,789 ---- load_balance(rq, 0); #endif + if (!(jiffies % ( 2 * MAX_S_CONTEXT_SLEEP)) ) + { + balance_contexts(); + } + spin_unlock(&rq->lock); } *************** *** 745,748 **** --- 810,817 ---- release_kernel_lock(prev, smp_processor_id()); prev->sleep_timestamp = jiffies; + if( prev -> s_context ) + { + atomic_set(&prev->s_context->sleeptime, jiffies); + } spin_lock_irq(&rq->lock); diff -rc2P linux/kernel/signal.c linux.ctx/kernel/signal.c *** linux/kernel/signal.c Sun Oct 13 15:38:35 2002 --- linux.ctx/kernel/signal.c Mon Oct 28 16:18:16 2002 *************** *** 619,623 **** read_lock(&tasklist_lock); for_each_task(p) { ! if (p->pgrp == pgrp && thread_group_leader(p)) { int err = send_sig_info(sig, info, p); if (retval) --- 619,627 ---- read_lock(&tasklist_lock); for_each_task(p) { ! if (p->pgrp == pgrp && thread_group_leader(p) && ! ( (long) info == 1 || ! p->s_context == current->s_context ) ! ) ! { int err = send_sig_info(sig, info, p); if (retval) *************** *** 673,676 **** --- 677,699 ---- p = tg; } + switch( (unsigned long)info ) + { + case 0: + if( p->s_context == current->s_context ) + { + error = send_sig_info(sig, info, p); + } + break; + case 1: + error = send_sig_info(sig, info, p); + break; + default: + if( info->si_code == SI_KERNEL || + p->s_context == current->s_context) + { + error = send_sig_info(sig, info, p); + } + break; + } error = send_sig_info(sig, info, p); } *************** *** 697,701 **** read_lock(&tasklist_lock); for_each_task(p) { ! if (p->pid > 1 && p != current && thread_group_leader(p)) { int err = send_sig_info(sig, info, p); ++count; --- 720,726 ---- read_lock(&tasklist_lock); for_each_task(p) { ! if (p->pid > 1 && p != current && thread_group_leader(p) && ! p->s_context == current->s_context ) ! { int err = send_sig_info(sig, info, p); ++count; diff -rc2P linux/kernel/sys.c linux.ctx/kernel/sys.c *** linux/kernel/sys.c Sun Oct 13 15:38:25 2002 --- linux.ctx/kernel/sys.c Fri Nov 1 11:00:40 2002 *************** *** 15,18 **** --- 15,19 ---- #include #include + #include #include *************** *** 501,505 **** * we should be checking for it. -DaveM */ ! new_user = alloc_uid(new_ruid); if (!new_user) return -EAGAIN; --- 502,506 ---- * we should be checking for it. -DaveM */ ! new_user = alloc_uid(current->s_context->id, new_ruid); if (!new_user) return -EAGAIN; *************** *** 1016,1022 **** { int errno = 0; down_read(&uts_sem); ! if (copy_to_user(name,&system_utsname,sizeof *name)) errno = -EFAULT; up_read(&uts_sem); --- 1017,1032 ---- { int errno = 0; + struct new_utsname tmp,*pttmp; down_read(&uts_sem); ! if (current-> s_context != NULL){ ! tmp = system_utsname; ! strcpy (tmp.nodename,current->s_context->nodename); ! strcpy (tmp.domainname,current->s_context->domainname); ! pttmp = &tmp; ! }else{ ! pttmp = &system_utsname; ! } ! if (copy_to_user(name,pttmp,sizeof *name)) errno = -EFAULT; up_read(&uts_sem); *************** *** 1024,1030 **** --- 1034,1042 ---- } + asmlinkage long sys_sethostname(char *name, int len) { int errno; + char *nodename; if (!capable(CAP_SYS_ADMIN)) *************** *** 1034,1039 **** down_write(&uts_sem); errno = -EFAULT; ! if (!copy_from_user(system_utsname.nodename, name, len)) { ! system_utsname.nodename[len] = 0; errno = 0; } --- 1046,1056 ---- down_write(&uts_sem); errno = -EFAULT; ! nodename = system_utsname.nodename; ! if (current->s_context != NULL) ! { ! nodename = current->s_context->nodename; ! } ! if (!copy_from_user(nodename, name, len)) { ! nodename[len] = 0; errno = 0; } *************** *** 1045,1057 **** { int i, errno; if (len < 0) return -EINVAL; down_read(&uts_sem); ! i = 1 + strlen(system_utsname.nodename); if (i > len) i = len; errno = 0; ! if (copy_to_user(name, system_utsname.nodename, i)) errno = -EFAULT; up_read(&uts_sem); --- 1062,1080 ---- { int i, errno; + char *nodename; if (len < 0) return -EINVAL; down_read(&uts_sem); ! nodename = system_utsname.nodename; ! if (current->s_context != NULL) ! { ! nodename = current->s_context->nodename; ! } ! i = 1 + strlen(nodename); if (i > len) i = len; errno = 0; ! if (copy_to_user(name, nodename, i)) errno = -EFAULT; up_read(&uts_sem); *************** *** 1066,1069 **** --- 1089,1093 ---- { int errno; + char *domainname; if (!capable(CAP_SYS_ADMIN)) *************** *** 1073,1080 **** down_write(&uts_sem); errno = -EFAULT; ! if (!copy_from_user(system_utsname.domainname, name, len)) { errno = 0; ! system_utsname.domainname[len] = 0; } up_write(&uts_sem); --- 1097,1109 ---- down_write(&uts_sem); + domainname = system_utsname.domainname; + if (current->s_context) + { + domainname = current->s_context->domainname; + } errno = -EFAULT; ! if (!copy_from_user(domainname, name, len)) { errno = 0; ! domainname[len] = 0; } up_write(&uts_sem); diff -rc2P linux/kernel/sysctl.c linux.ctx/kernel/sysctl.c *** linux/kernel/sysctl.c Sun Oct 13 15:38:39 2002 --- linux.ctx/kernel/sysctl.c Fri Nov 1 10:39:46 2002 *************** *** 31,34 **** --- 31,35 ---- #include #include + #include #include *************** *** 393,396 **** --- 394,398 ---- static int test_perm(int mode, int op) { + if (!capable(CAP_SYS_ADMIN)) mode &= ~(0222); if (!current->euid) mode >>= 6; *************** *** 807,811 **** --- 809,824 ---- { int r; + ctl_table tmp; + /* HACK for per s_context hostname and domainname */ + if (current-> s_context != NULL){ + tmp = *table; + table = &tmp; + if (table->data == (void*)&system_utsname.nodename){ + tmp.data = ¤t -> s_context -> nodename; + }else if (table->data == (void*)&system_utsname.domainname){ + tmp.data = ¤t -> s_context -> domainname; + } + } if (!write) { down_read(&uts_sem); Binary files linux/kernel/tcedit.dst and linux.ctx/kernel/tcedit.dst differ diff -rc2P linux/kernel/timer.c linux.ctx/kernel/timer.c *** linux/kernel/timer.c Sun Oct 13 15:38:35 2002 --- linux.ctx/kernel/timer.c Fri Nov 1 10:44:22 2002 *************** *** 23,26 **** --- 23,27 ---- #include #include + #include #include *************** *** 727,730 **** --- 728,736 ---- { /* This is SMP safe - current->pid doesn't change */ + if (current->s_context != NULL + && current -> s_context -> initpid == current->tgid){ + /* We are faking process 1 for this security context */ + return 1; + } return current->tgid; } *************** *** 773,776 **** --- 779,788 ---- break; } + if (pid != 0 + && current -> s_context != NULL + && current -> s_context -> initpid == pid){ + /* We are faking process 1 for this security context */ + pid = 1; + } return pid; } diff -rc2P linux/kernel/user.c linux.ctx/kernel/user.c *** linux/kernel/user.c Wed Nov 29 09:43:39 2000 --- linux.ctx/kernel/user.c Fri Oct 25 16:32:05 2002 *************** *** 7,10 **** --- 7,23 ---- * processes, files etc the user has claimed, in order to be * able to have per-user limits for system resources. + * + * For the vserver project, the key is extended from UID to (SC,UID), + * with SC being the security context ID. Thus, each security context + * has independant per-UID resource usage counters. + * + * As a consequence, even if two UIDs are the same, the 'struct user *' + * in their task_struct could be different. I don't think any code cares. + * + * (vserver modifications done Sun Jan 13 08:48:45 CET 2002 by bof@bof.de) + * + * NOTE: For now, the hash function is unmodified: the same uid in several + * security contexts, will always sit on the same hash chain. This could + * be changed easily. */ *************** *** 57,61 **** } ! static inline struct user_struct *uid_hash_find(uid_t uid, struct user_struct **hashent) { struct user_struct *next; --- 70,74 ---- } ! static inline struct user_struct *uid_hash_find(int s_context, uid_t uid, struct user_struct **hashent) { struct user_struct *next; *************** *** 66,70 **** if (next) { next = up->next; ! if (up->uid != uid) continue; atomic_inc(&up->__count); --- 79,83 ---- if (next) { next = up->next; ! if (up->uid != uid || up->s_context != s_context) continue; atomic_inc(&up->__count); *************** *** 83,87 **** } ! struct user_struct * alloc_uid(uid_t uid) { struct user_struct **hashent = uidhashentry(uid); --- 96,100 ---- } ! struct user_struct * alloc_uid(int s_context, uid_t uid) { struct user_struct **hashent = uidhashentry(uid); *************** *** 89,93 **** spin_lock(&uidhash_lock); ! up = uid_hash_find(uid, hashent); spin_unlock(&uidhash_lock); --- 102,106 ---- spin_lock(&uidhash_lock); ! up = uid_hash_find(s_context, uid, hashent); spin_unlock(&uidhash_lock); *************** *** 99,102 **** --- 112,116 ---- return NULL; new->uid = uid; + new->s_context = s_context; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); *************** *** 108,112 **** */ spin_lock(&uidhash_lock); ! up = uid_hash_find(uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); --- 122,126 ---- */ spin_lock(&uidhash_lock); ! up = uid_hash_find(s_context, uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); diff -rc2P linux/net/ipv4/af_inet.c linux.ctx/net/ipv4/af_inet.c *** linux/net/ipv4/af_inet.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv4/af_inet.c Fri Nov 1 16:02:07 2002 *************** *** 394,397 **** --- 394,399 ---- sk->protinfo.af_inet.mc_list = NULL; + sk->s_context = current->s_context->id; + #ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr); *************** *** 478,481 **** --- 480,486 ---- int chk_addr_ret; int err; + __u32 s_addr; + __u32 bcast_addr = 0xffffffffl; + __u32 ipv4root; /* If the socket has its own bind function then use it. (RAW) */ *************** *** 486,490 **** return -EINVAL; ! chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too --- 491,518 ---- return -EINVAL; ! s_addr = addr->sin_addr.s_addr; ! ipv4root = current->s_context != NULL ? current->s_context->ipv4[0] : 0; ! if (ipv4root != 0){ ! // printk ("ipv4root0 %08lx %08x\n",ipv4root,s_addr); ! __u32 v4_bcast = current->s_context->v4_bcast; ! if (s_addr == 0){ ! s_addr = ipv4root; ! bcast_addr = v4_bcast; ! }else if (s_addr == 0x0100007f){ ! s_addr = ipv4root; ! }else if (s_addr != v4_bcast ! && s_addr != ipv4root){ ! int i; ! int nbipv4 = current->s_context->nbipv4; ! for (i=0; is_context->ipv4[i]){ ! break; ! } ! } ! if (i == nbipv4) return -EADDRNOTAVAIL; ! } ! } ! chk_addr_ret = inet_addr_type(s_addr); ! // printk ("ipv4root %08lx %08x %d\n",ipv4root,s_addr,chk_addr_ret); /* Not specified by any standard per-se, however it breaks too *************** *** 497,501 **** if (sysctl_ip_nonlocal_bind == 0 && sk->protinfo.af_inet.freebind == 0 && ! addr->sin_addr.s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && --- 525,529 ---- if (sysctl_ip_nonlocal_bind == 0 && sk->protinfo.af_inet.freebind == 0 && ! s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && *************** *** 522,526 **** goto out; ! sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) sk->saddr = 0; /* Use device */ --- 550,555 ---- goto out; ! sk->rcv_saddr = sk->saddr = s_addr; ! sk->bcast_addr = bcast_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) sk->saddr = 0; /* Use device */ diff -rc2P linux/net/ipv4/devinet.c linux.ctx/net/ipv4/devinet.c *** linux/net/ipv4/devinet.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv4/devinet.c Wed Nov 6 17:48:56 2002 *************** *** 469,472 **** --- 469,473 ---- int ret = 0; int tryaddrmatch = 0; + unsigned int context = ( current->s_context != NULL ) ? current->s_context->id : 0 ; /* *************** *** 539,544 **** This is checked above. */ for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) { ! if ((strcmp(ifr.ifr_name, ifa->ifa_label) == 0) ! && (sin_orig.sin_addr.s_addr == ifa->ifa_address)) { break; /* found */ } --- 540,546 ---- This is checked above. */ for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) { ! if ( ( context == ifa->s_context ) ! && (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) ! && (sin_orig.sin_addr.s_addr == ifa->ifa_address)) { break; /* found */ } *************** *** 550,554 **** if (ifa == NULL) { for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) ! if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) break; } --- 552,557 ---- if (ifa == NULL) { for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) ! if ( ( context == ifa -> s_context ) ! && (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) ) break; } *************** *** 559,563 **** goto done; } ! switch(cmd) { case SIOCGIFADDR: /* Get interface address */ --- 562,566 ---- goto done; } ! switch(cmd) { case SIOCGIFADDR: /* Get interface address */ *************** *** 601,604 **** --- 604,608 ---- break; } + ifa -> s_context = context; if (colon) memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); *************** *** 687,690 **** --- 691,695 ---- struct ifreq ifr; int done=0; + unsigned int context = ( current->s_context != NULL ) ? current->s_context->id : 0 ; if (in_dev==NULL || (ifa=in_dev->ifa_list)==NULL) *************** *** 692,695 **** --- 697,702 ---- for ( ; ifa; ifa = ifa->ifa_next) { + // We do not show other IP devices to vservers + if (ifa->s_context != context) continue; if (!buf) { done += sizeof(ifr); *************** *** 909,912 **** --- 916,920 ---- for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + // if (devinet_notiproot(ifa)) continue; if (ip_idx < s_ip_idx) continue; diff -rc2P linux/net/ipv4/raw.c linux.ctx/net/ipv4/raw.c *** linux/net/ipv4/raw.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv4/raw.c Fri Nov 1 14:21:27 2002 *************** *** 658,662 **** for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET) continue; pos += 128; --- 658,663 ---- for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET || ! (current->s_context->id != 1 && sk->s_context != current->s_context->id)) continue; pos += 128; diff -rc2P linux/net/ipv4/tcp_ipv4.c linux.ctx/net/ipv4/tcp_ipv4.c *** linux/net/ipv4/tcp_ipv4.c Sun Oct 13 15:38:43 2002 --- linux.ctx/net/ipv4/tcp_ipv4.c Fri Nov 1 14:20:51 2002 *************** *** 2184,2187 **** --- 2184,2190 ---- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + if (current->s_context->id != 1 && sk->s_context != current->s_context->id) + continue; + if (!TCP_INET_FAMILY(sk->family)) goto skip_listen; *************** *** 2237,2241 **** read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { ! if (!TCP_INET_FAMILY(sk->family)) continue; pos += TMPSZ; --- 2240,2244 ---- read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { ! if (!TCP_INET_FAMILY(sk->family) || (current->s_context->id != 1 && sk->s_context != current->s_context->id)) continue; pos += TMPSZ; *************** *** 2252,2256 **** tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { ! if (!TCP_INET_FAMILY(tw->family)) continue; pos += TMPSZ; --- 2255,2259 ---- tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { ! if (!TCP_INET_FAMILY(tw->family) || (current->s_context->id != 1 && tw->s_context != current->s_context->id)) continue; pos += TMPSZ; diff -rc2P linux/net/ipv4/tcp_minisocks.c linux.ctx/net/ipv4/tcp_minisocks.c *** linux/net/ipv4/tcp_minisocks.c Sun Oct 13 15:38:50 2002 --- linux.ctx/net/ipv4/tcp_minisocks.c Fri Oct 25 16:32:05 2002 *************** *** 381,384 **** --- 381,386 ---- tw->pprev_death = NULL; + tw->s_context = sk->s_context; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if(tw->family == PF_INET6) { diff -rc2P linux/net/ipv4/udp.c linux.ctx/net/ipv4/udp.c *** linux/net/ipv4/udp.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv4/udp.c Fri Nov 1 14:25:56 2002 *************** *** 273,277 **** (s->daddr && s->daddr!=rmt_addr) || (s->dport != rmt_port && s->dport != 0) || ! (s->rcv_saddr && s->rcv_saddr != loc_addr) || (s->bound_dev_if && s->bound_dev_if != dif)) continue; --- 273,277 ---- (s->daddr && s->daddr!=rmt_addr) || (s->dport != rmt_port && s->dport != 0) || ! (s->rcv_saddr && s->rcv_saddr != loc_addr && s->bcast_addr != loc_addr) || (s->bound_dev_if && s->bound_dev_if != dif)) continue; *************** *** 1005,1009 **** for (sk = udp_hash[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET) continue; pos += 128; --- 1005,1010 ---- for (sk = udp_hash[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET || ! (current->s_context->id != 1 && sk->s_context != current->s_context->id)) continue; pos += 128; diff -rc2P linux/net/ipv6/raw.c linux.ctx/net/ipv6/raw.c *** linux/net/ipv6/raw.c Sun Oct 13 15:38:43 2002 --- linux.ctx/net/ipv6/raw.c Fri Oct 25 16:32:05 2002 *************** *** 880,884 **** for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6) continue; pos += LINE_LEN+1; --- 880,884 ---- for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6 || (current->s_context != 1 && sk->s_context != current->s_context)) continue; pos += LINE_LEN+1; diff -rc2P linux/net/ipv6/tcp_ipv6.c linux.ctx/net/ipv6/tcp_ipv6.c *** linux/net/ipv6/tcp_ipv6.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv6/tcp_ipv6.c Fri Oct 25 16:32:05 2002 *************** *** 2007,2011 **** struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); ! if (sk->family != PF_INET6) continue; pos += LINE_LEN+1; --- 2007,2011 ---- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); ! if (sk->family != PF_INET6 || (current->s_context != 1 && sk->s_context != current->s_context)) continue; pos += LINE_LEN+1; *************** *** 2057,2061 **** read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6) continue; pos += LINE_LEN+1; --- 2057,2061 ---- read_lock(&head->lock); for(sk = head->chain; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6 || (current->s_context != 1 && sk->s_context != current->s_context)) continue; pos += LINE_LEN+1; *************** *** 2072,2076 **** tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { ! if (tw->family != PF_INET6) continue; pos += LINE_LEN+1; --- 2072,2076 ---- tw != NULL; tw = (struct tcp_tw_bucket *)tw->next, num++) { ! if (tw->family != PF_INET6 || (current->s_context != 1 && tw->s_context != current->s_context)) continue; pos += LINE_LEN+1; diff -rc2P linux/net/ipv6/udp.c linux.ctx/net/ipv6/udp.c *** linux/net/ipv6/udp.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/ipv6/udp.c Fri Oct 25 16:32:05 2002 *************** *** 958,962 **** for (sk = udp_hash[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6) continue; pos += LINE_LEN+1; --- 958,962 ---- for (sk = udp_hash[i]; sk; sk = sk->next, num++) { ! if (sk->family != PF_INET6 || (current->s_context != 1 && sk->s_context != current->s_context)) continue; pos += LINE_LEN+1; diff -rc2P linux/net/socket.c linux.ctx/net/socket.c *** linux/net/socket.c Sun Oct 13 15:38:37 2002 --- linux.ctx/net/socket.c Mon Oct 28 16:55:37 2002 *************** *** 1764,1771 **** --- 1764,1773 ---- len = length; if (len < 0) + len = 0; return len; } + int tux_Dprintk; int tux_TDprintk; diff -rc2P linux/net/unix/af_unix.c linux.ctx/net/unix/af_unix.c *** linux/net/unix/af_unix.c Sun Oct 13 15:38:36 2002 --- linux.ctx/net/unix/af_unix.c Fri Nov 1 16:12:40 2002 *************** *** 110,113 **** --- 110,114 ---- #include #include + #include #include *************** *** 480,483 **** --- 481,486 ---- sk->write_space = unix_write_space; + sk->s_context = current->s_context->id; + sk->max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->destruct = unix_sock_destructor; *************** *** 1749,1752 **** --- 1752,1758 ---- forall_unix_sockets (i,s) { + if (current->s_context->id != 1 && s->s_context != current->s_context->id) + continue; + unix_state_rlock(s);