FreeBSD Bugzilla – Attachment 169480 Details for
Bug 48471
[jail] Private IPC for every jail
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
IPC key_t space separation, using OSD methods
jail_sysv.diff (text/plain), 44.67 KB, created by
Jamie Gritton
on 2016-04-19 22:57:35 UTC
(
hide
)
Description:
IPC key_t space separation, using OSD methods
Filename:
MIME Type:
Creator:
Jamie Gritton
Created:
2016-04-19 22:57:35 UTC
Size:
44.67 KB
patch
obsolete
>Index: kern/sysv_msg.c >=================================================================== >--- kern/sysv_msg.c (revision 298132) >+++ kern/sysv_msg.c (working copy) >@@ -62,8 +62,11 @@ > #include <sys/lock.h> > #include <sys/mutex.h> > #include <sys/module.h> >+#include <sys/mount.h> > #include <sys/msg.h> > #include <sys/racct.h> >+#include <sys/sbuf.h> >+#include <sys/sx.h> > #include <sys/syscall.h> > #include <sys/syscallsubr.h> > #include <sys/sysent.h> >@@ -80,6 +83,14 @@ > static int msginit(void); > static int msgunload(void); > static int sysvmsg_modload(struct module *, int, void *); >+static void msq_remove(struct msqid_kernel *); >+static struct prison *msg_find_prison(struct ucred *); >+static int msq_prison_cansee(struct prison *, struct msqid_kernel *); >+static int msg_prison_check(void *, void *); >+static int msg_prison_set(void *, void *); >+static int msg_prison_get(void *, void *); >+static int msg_prison_remove(void *, void *); >+static void msg_prison_cleanup(struct prison *); > > > #ifdef MSG_DEBUG >@@ -155,6 +166,7 @@ > static struct msg *msghdrs; /* MSGTQL msg headers */ > static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */ > static struct mtx msq_mtx; /* global mutex for message queues. */ >+static unsigned msg_prison_slot;/* prison OSD slot */ > > static struct syscall_helper_data msg_syscalls[] = { > SYSCALL_INIT_HELPER(msgctl), >@@ -194,7 +206,15 @@ > static int > msginit() > { >+ struct prison *pr; >+ void *rsv; > int i, error; >+ osd_method_t methods[PR_MAXMETHOD] = { >+ [PR_METHOD_CHECK] = msg_prison_check, >+ [PR_METHOD_SET] = msg_prison_set, >+ [PR_METHOD_GET] = msg_prison_get, >+ [PR_METHOD_REMOVE] = msg_prison_remove, >+ }; > > msginfo.msgmax = msginfo.msgseg * msginfo.msgssz; > msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK); >@@ -252,6 +272,29 @@ > } > mtx_init(&msq_mtx, "msq", NULL, MTX_DEF); > >+ /* Set current prisons according to their allow.sysvipc. */ >+ msg_prison_slot = osd_jail_register(NULL, methods); >+ rsv = osd_reserve(msg_prison_slot); >+ prison_lock(&prison0); >+ (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0); >+ prison_unlock(&prison0); >+ rsv = NULL; >+ sx_slock(&allprison_lock); >+ TAILQ_FOREACH(pr, &allprison, pr_list) { >+ if (rsv == NULL) >+ rsv = osd_reserve(msg_prison_slot); >+ prison_lock(pr); >+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { >+ (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, >+ &prison0); >+ rsv = NULL; >+ } >+ prison_unlock(pr); >+ } >+ if (rsv != NULL) >+ osd_free_reserved(rsv); >+ sx_sunlock(&allprison_lock); >+ > error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD); > if (error != 0) > return (error); >@@ -292,6 +335,8 @@ > if (msqid != msginfo.msgmni) > return (EBUSY); > >+ if (msg_prison_slot != 0) >+ osd_jail_deregister(msg_prison_slot); > #ifdef MAC > for (i = 0; i < msginfo.msgtql; i++) > mac_sysvmsg_destroy(&msghdrs[i]); >@@ -366,6 +411,67 @@ > #endif > } > >+static void >+msq_remove(struct msqid_kernel *msqkptr) >+{ >+ struct msg *msghdr; >+ >+ racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); >+ racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum); >+ racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes); >+ crfree(msqkptr->cred); >+ msqkptr->cred = NULL; >+ >+ /* Free the message headers */ >+ msghdr = msqkptr->u.msg_first; >+ while (msghdr != NULL) { >+ struct msg *msghdr_tmp; >+ >+ /* Free the segments of each message */ >+ msqkptr->u.msg_cbytes -= msghdr->msg_ts; >+ msqkptr->u.msg_qnum--; >+ msghdr_tmp = msghdr; >+ msghdr = msghdr->msg_next; >+ msg_freehdr(msghdr_tmp); >+ } >+ >+ if (msqkptr->u.msg_cbytes != 0) >+ panic("msg_cbytes is screwed up"); >+ if (msqkptr->u.msg_qnum != 0) >+ panic("msg_qnum is screwed up"); >+ >+ msqkptr->u.msg_qbytes = 0; /* Mark it as free */ >+ >+#ifdef MAC >+ mac_sysvmsq_cleanup(msqkptr); >+#endif >+ >+ wakeup(msqkptr); >+} >+ >+static struct prison * >+msg_find_prison(struct ucred *cred) >+{ >+ struct prison *pr, *rpr; >+ >+ pr = cred->cr_prison; >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, msg_prison_slot); >+ prison_unlock(pr); >+ return rpr; >+} >+ >+static int >+msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr) >+{ >+ >+ if (msqkptr->cred == NULL || >+ !(rpr == msqkptr->cred->cr_prison || >+ prison_ischild(rpr, msqkptr->cred->cr_prison))) >+ return (EINVAL); >+ return (0); >+} >+ > #ifndef _SYS_SYSPROTO_H_ > struct msgctl_args { > int msqid; >@@ -402,8 +508,10 @@ > { > int rval, error, msqix; > register struct msqid_kernel *msqkptr; >+ struct prison *rpr; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = msg_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > > msqix = IPCID_TO_IX(msqid); >@@ -427,6 +535,13 @@ > error = EINVAL; > goto done2; > } >+ >+ error = msq_prison_cansee(rpr, msqkptr); >+ if (error != 0) { >+ DPRINTF(("requester can't see prison\n")); >+ goto done2; >+ } >+ > #ifdef MAC > error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd); > if (error != 0) >@@ -440,7 +555,9 @@ > > case IPC_RMID: > { >+#ifdef MAC > struct msg *msghdr; >+#endif > if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) > goto done2; > >@@ -462,37 +579,7 @@ > } > #endif > >- racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); >- racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum); >- racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes); >- crfree(msqkptr->cred); >- msqkptr->cred = NULL; >- >- /* Free the message headers */ >- msghdr = msqkptr->u.msg_first; >- while (msghdr != NULL) { >- struct msg *msghdr_tmp; >- >- /* Free the segments of each message */ >- msqkptr->u.msg_cbytes -= msghdr->msg_ts; >- msqkptr->u.msg_qnum--; >- msghdr_tmp = msghdr; >- msghdr = msghdr->msg_next; >- msg_freehdr(msghdr_tmp); >- } >- >- if (msqkptr->u.msg_cbytes != 0) >- panic("msg_cbytes is screwed up"); >- if (msqkptr->u.msg_qnum != 0) >- panic("msg_qnum is screwed up"); >- >- msqkptr->u.msg_qbytes = 0; /* Mark it as free */ >- >-#ifdef MAC >- mac_sysvmsq_cleanup(msqkptr); >-#endif >- >- wakeup(msqkptr); >+ msq_remove(msqkptr); > } > > break; >@@ -529,6 +616,8 @@ > goto done2; > } > *msqbuf = msqkptr->u; >+ if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison) >+ msqbuf->msg_perm.key = IPC_PRIVATE; > break; > > default: >@@ -564,7 +653,7 @@ > > DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ if (msg_find_prison(cred) == NULL) > return (ENOSYS); > > mtx_lock(&msq_mtx); >@@ -572,6 +661,8 @@ > for (msqid = 0; msqid < msginfo.msgmni; msqid++) { > msqkptr = &msqids[msqid]; > if (msqkptr->u.msg_qbytes != 0 && >+ msqkptr->cred != NULL && >+ msqkptr->cred->cr_prison == cred->cr_prison && > msqkptr->u.msg_perm.key == key) > break; > } >@@ -684,12 +775,14 @@ > int msqix, segs_needed, error = 0; > register struct msqid_kernel *msqkptr; > register struct msg *msghdr; >+ struct prison *rpr; > short next; > #ifdef RACCT > size_t saved_msgsz; > #endif > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = msg_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > > mtx_lock(&msq_mtx); >@@ -714,6 +807,11 @@ > goto done2; > } > >+ if ((error = msq_prison_cansee(rpr, msqkptr))) { >+ DPRINTF(("requester can't see prison\n")); >+ goto done2; >+ } >+ > if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) { > DPRINTF(("requester doesn't have write access\n")); > goto done2; >@@ -1052,10 +1150,12 @@ > size_t len; > register struct msqid_kernel *msqkptr; > register struct msg *msghdr; >+ struct prison *rpr; > int msqix, error = 0; > short next; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = msg_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > > msqix = IPCID_TO_IX(msqid); >@@ -1079,6 +1179,11 @@ > goto done2; > } > >+ if ((error = msq_prison_cansee(rpr, msqkptr))) { >+ DPRINTF(("requester can't see prison\n")); >+ goto done2; >+ } >+ > if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { > DPRINTF(("requester doesn't have read access\n")); > goto done2; >@@ -1318,9 +1423,39 @@ > static int > sysctl_msqids(SYSCTL_HANDLER_ARGS) > { >+ struct sbuf sb; >+ struct msqid_kernel tmp, empty; >+ struct msqid_kernel *msqkptr; >+ struct prison *rpr; >+ int error, i; > >- return (SYSCTL_OUT(req, msqids, >- sizeof(struct msqid_kernel) * msginfo.msgmni)); >+ error = sysctl_wire_old_buffer(req, 0); >+ if (error != 0) >+ goto done; >+ rpr = msg_find_prison(req->td->td_ucred); >+ sbuf_new_for_sysctl(&sb, NULL, sizeof(struct msqid_kernel) * >+ msginfo.msgmni, req); >+ >+ bzero(&empty, sizeof(empty)); >+ for (i = 0; i < msginfo.msgmni; i++) { >+ msqkptr = &msqids[i]; >+ if (msqkptr->u.msg_qbytes == 0 || rpr == NULL || >+ msq_prison_cansee(rpr, msqkptr) != 0) { >+ msqkptr = ∅ >+ } else if (req->td->td_ucred->cr_prison != >+ msqkptr->cred->cr_prison) { >+ bcopy(msqkptr, &tmp, sizeof(tmp)); >+ msqkptr = &tmp; >+ msqkptr->u.msg_perm.key = IPC_PRIVATE; >+ } >+ >+ sbuf_bcat(&sb, msqkptr, sizeof(*msqkptr)); >+ } >+ error = sbuf_finish(&sb); >+ sbuf_delete(&sb); >+ >+done: >+ return (error); > } > > SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, >@@ -1338,6 +1473,181 @@ > SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD, > NULL, 0, sysctl_msqids, "", "Message queue IDs"); > >+static int >+msg_prison_check(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *prpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* >+ * sysvmsg is a jailsys integer. >+ * It must be "disable" if the parent jail is disabled. >+ */ >+ error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)); >+ if (error != ENOENT) { >+ if (error != 0) >+ return (error); >+ switch (jsys) { >+ case JAIL_SYS_DISABLE: >+ break; >+ case JAIL_SYS_NEW: >+ case JAIL_SYS_INHERIT: >+ prison_lock(pr->pr_parent); >+ prpr = osd_jail_get(pr->pr_parent, msg_prison_slot); >+ prison_unlock(pr->pr_parent); >+ if (prpr == NULL) >+ return (EPERM); >+ break; >+ default: >+ return (EINVAL); >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+msg_prison_set(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *tpr, *orpr, *nrpr, *trpr; >+ struct vfsoptlist *opts = data; >+ void *rsv; >+ int jsys, descend; >+ >+ /* >+ * sysvmsg controls which jail is the root of the associated msgs (this >+ * jail or same as the parent), or if the feature is available at all. >+ */ >+ if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT) >+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) >+ ? JAIL_SYS_INHERIT >+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) >+ ? JAIL_SYS_DISABLE >+ : -1; >+ if (jsys == JAIL_SYS_DISABLE) { >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, msg_prison_slot); >+ if (orpr != NULL) >+ osd_jail_del(pr, msg_prison_slot); >+ prison_unlock(pr); >+ if (orpr != NULL) { >+ if (orpr == pr) >+ msg_prison_cleanup(pr); >+ /* Disable all child jails as well. */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, msg_prison_slot); >+ if (trpr != NULL) { >+ osd_jail_del(tpr, msg_prison_slot); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ msg_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } else if (jsys != -1) { >+ if (jsys == JAIL_SYS_NEW) >+ nrpr = pr; >+ else { >+ prison_lock(pr->pr_parent); >+ nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot); >+ prison_unlock(pr->pr_parent); >+ } >+ rsv = osd_reserve(msg_prison_slot); >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, msg_prison_slot); >+ if (orpr != nrpr) >+ (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, >+ nrpr); >+ else >+ osd_free_reserved(rsv); >+ prison_unlock(pr); >+ if (orpr != nrpr) { >+ if (orpr == pr) >+ msg_prison_cleanup(pr); >+ if (orpr != NULL) { >+ /* Change child jails matching the old root, */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, >+ msg_prison_slot); >+ if (trpr == orpr) { >+ (void)osd_jail_set(tpr, >+ msg_prison_slot, nrpr); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ msg_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+msg_prison_get(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* Set sysvmsg based on the jail's root prison. */ >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, msg_prison_slot); >+ prison_unlock(pr); >+ jsys = rpr == NULL ? JAIL_SYS_DISABLE >+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; >+ error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys)); >+ if (error == ENOENT) >+ error = 0; >+ return (error); >+} >+ >+static int >+msg_prison_remove(void *obj, void *data __unused) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, msg_prison_slot); >+ prison_unlock(pr); >+ if (rpr == pr) >+ msg_prison_cleanup(pr); >+ return (0); >+} >+ >+static void >+msg_prison_cleanup(struct prison *pr) >+{ >+ struct msqid_kernel *msqkptr; >+ int i; >+ >+ /* Remove any msqs that belong to this jail. */ >+ mtx_lock(&msq_mtx); >+ for (i = 0; i < msginfo.msgmni; i++) { >+ msqkptr = &msqids[i]; >+ if (msqkptr->u.msg_qbytes != 0 && >+ msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr) >+ msq_remove(msqkptr); >+ } >+ mtx_unlock(&msq_mtx); >+} >+ >+SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues"); >+ > #ifdef COMPAT_FREEBSD32 > int > freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap) >@@ -1516,8 +1826,6 @@ > { > int error; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >- return (ENOSYS); > if (uap->which < 0 || > uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0])) > return (EINVAL); >Index: kern/sysv_sem.c >=================================================================== >--- kern/sysv_sem.c (revision 298132) >+++ kern/sysv_sem.c (working copy) >@@ -52,7 +52,9 @@ > #include <sys/module.h> > #include <sys/mutex.h> > #include <sys/racct.h> >+#include <sys/sbuf.h> > #include <sys/sem.h> >+#include <sys/sx.h> > #include <sys/syscall.h> > #include <sys/syscallsubr.h> > #include <sys/sysent.h> >@@ -78,7 +80,16 @@ > static int semunload(void); > static void semexit_myhook(void *arg, struct proc *p); > static int sysctl_sema(SYSCTL_HANDLER_ARGS); >-static int semvalid(int semid, struct semid_kernel *semakptr); >+static int semvalid(int semid, struct prison *rpr, >+ struct semid_kernel *semakptr); >+static void sem_remove(int semidx, struct ucred *cred); >+static struct prison *sem_find_prison(struct ucred *); >+static int sem_prison_cansee(struct prison *, struct semid_kernel *); >+static int sem_prison_check(void *, void *); >+static int sem_prison_set(void *, void *); >+static int sem_prison_get(void *, void *); >+static int sem_prison_remove(void *, void *); >+static void sem_prison_cleanup(struct prison *); > > #ifndef _SYS_SYSPROTO_H_ > struct __semctl_args; >@@ -104,6 +115,7 @@ > LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */ > static int *semu; /* undo structure pool */ > static eventhandler_tag semexit_tag; >+static unsigned sem_prison_slot; /* prison OSD slot */ > > #define SEMUNDO_MTX sem_undo_mtx > #define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX); >@@ -247,7 +259,15 @@ > static int > seminit(void) > { >+ struct prison *pr; >+ void *rsv; > int i, error; >+ osd_method_t methods[PR_MAXMETHOD] = { >+ [PR_METHOD_CHECK] = sem_prison_check, >+ [PR_METHOD_SET] = sem_prison_set, >+ [PR_METHOD_GET] = sem_prison_get, >+ [PR_METHOD_REMOVE] = sem_prison_remove, >+ }; > > sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK); > sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM, >@@ -278,6 +298,29 @@ > semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL, > EVENTHANDLER_PRI_ANY); > >+ /* Set current prisons according to their allow.sysvipc. */ >+ sem_prison_slot = osd_jail_register(NULL, methods); >+ rsv = osd_reserve(sem_prison_slot); >+ prison_lock(&prison0); >+ (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0); >+ prison_unlock(&prison0); >+ rsv = NULL; >+ sx_slock(&allprison_lock); >+ TAILQ_FOREACH(pr, &allprison, pr_list) { >+ if (rsv == NULL) >+ rsv = osd_reserve(sem_prison_slot); >+ prison_lock(pr); >+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { >+ (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, >+ &prison0); >+ rsv = NULL; >+ } >+ prison_unlock(pr); >+ } >+ if (rsv != NULL) >+ osd_free_reserved(rsv); >+ sx_sunlock(&allprison_lock); >+ > error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD); > if (error != 0) > return (error); >@@ -303,6 +346,8 @@ > #endif > syscall_helper_unregister(sem_syscalls); > EVENTHANDLER_DEREGISTER(process_exit, semexit_tag); >+ if (sem_prison_slot != 0) >+ osd_jail_deregister(sem_prison_slot); > #ifdef MAC > for (i = 0; i < seminfo.semmni; i++) > mac_sysvsem_destroy(&sema[i]); >@@ -489,13 +534,76 @@ > } > > static int >-semvalid(int semid, struct semid_kernel *semakptr) >+semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr) > { > > return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || >- semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0); >+ semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) || >+ sem_prison_cansee(rpr, semakptr) ? EINVAL : 0); > } > >+static void >+sem_remove(int semidx, struct ucred *cred) >+{ >+ struct semid_kernel *semakptr; >+ int i; >+ >+ KASSERT(semidx >= 0 && semidx < seminfo.semmni, >+ ("semidx out of bounds")); >+ semakptr = &sema[semidx]; >+ semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0; >+ semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0; >+ semakptr->u.sem_perm.mode = 0; >+ racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems); >+ crfree(semakptr->cred); >+ semakptr->cred = NULL; >+ SEMUNDO_LOCK(); >+ semundo_clear(semidx, -1); >+ SEMUNDO_UNLOCK(); >+#ifdef MAC >+ mac_sysvsem_cleanup(semakptr); >+#endif >+ wakeup(semakptr); >+ for (i = 0; i < seminfo.semmni; i++) { >+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >+ sema[i].u.sem_base > semakptr->u.sem_base) >+ mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); >+ } >+ for (i = semakptr->u.sem_base - sem; i < semtot; i++) >+ sem[i] = sem[i + semakptr->u.sem_nsems]; >+ for (i = 0; i < seminfo.semmni; i++) { >+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >+ sema[i].u.sem_base > semakptr->u.sem_base) { >+ sema[i].u.sem_base -= semakptr->u.sem_nsems; >+ mtx_unlock(&sema_mtx[i]); >+ } >+ } >+ semtot -= semakptr->u.sem_nsems; >+} >+ >+static struct prison * >+sem_find_prison(struct ucred *cred) >+{ >+ struct prison *pr, *rpr; >+ >+ pr = cred->cr_prison; >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, sem_prison_slot); >+ prison_unlock(pr); >+ return rpr; >+} >+ >+static int >+sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr) >+{ >+ >+ if (semakptr->cred == NULL || >+ !(rpr == semakptr->cred->cr_prison || >+ prison_ischild(rpr, semakptr->cred->cr_prison))) >+ return (EINVAL); >+ return (0); >+} >+ > /* > * Note that the user-mode half of this passes a union, not a pointer. > */ >@@ -572,6 +680,7 @@ > u_short *array; > struct ucred *cred = td->td_ucred; > int i, error; >+ struct prison *rpr; > struct semid_ds *sbuf; > struct semid_kernel *semakptr; > struct mtx *sema_mtxp; >@@ -580,7 +689,9 @@ > > DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n", > semid, semnum, cmd, arg)); >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ >+ rpr = sem_find_prison(td->td_ucred); >+ if (sem == NULL) > return (ENOSYS); > > array = NULL; >@@ -600,6 +711,8 @@ > error = EINVAL; > goto done2; > } >+ if ((error = sem_prison_cansee(rpr, semakptr))) >+ goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; > #ifdef MAC >@@ -608,6 +721,8 @@ > goto done2; > #endif > bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); >+ if (cred->cr_prison != semakptr->cred->cr_prison) >+ arg->buf->sem_perm.key = IPC_PRIVATE; > *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm); > mtx_unlock(sema_mtxp); > return (0); >@@ -622,6 +737,7 @@ > if (cmd == IPC_RMID) > mtx_lock(&sem_mtx); > mtx_lock(sema_mtxp); >+ > #ifdef MAC > error = mac_sysvsem_check_semctl(cred, semakptr, cmd); > if (error != 0) >@@ -633,42 +749,15 @@ > > switch (cmd) { > case IPC_RMID: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) > goto done2; >- semakptr->u.sem_perm.cuid = cred->cr_uid; >- semakptr->u.sem_perm.uid = cred->cr_uid; >- semakptr->u.sem_perm.mode = 0; >- racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems); >- crfree(semakptr->cred); >- semakptr->cred = NULL; >- SEMUNDO_LOCK(); >- semundo_clear(semidx, -1); >- SEMUNDO_UNLOCK(); >-#ifdef MAC >- mac_sysvsem_cleanup(semakptr); >-#endif >- wakeup(semakptr); >- for (i = 0; i < seminfo.semmni; i++) { >- if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >- sema[i].u.sem_base > semakptr->u.sem_base) >- mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); >- } >- for (i = semakptr->u.sem_base - sem; i < semtot; i++) >- sem[i] = sem[i + semakptr->u.sem_nsems]; >- for (i = 0; i < seminfo.semmni; i++) { >- if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >- sema[i].u.sem_base > semakptr->u.sem_base) { >- sema[i].u.sem_base -= semakptr->u.sem_nsems; >- mtx_unlock(&sema_mtx[i]); >- } >- } >- semtot -= semakptr->u.sem_nsems; >+ sem_remove(semidx, cred); > break; > > case IPC_SET: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) > goto done2; >@@ -681,15 +770,17 @@ > break; > > case IPC_STAT: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; > bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); >+ if (cred->cr_prison != semakptr->cred->cr_prison) >+ arg->buf->sem_perm.key = IPC_PRIVATE; > break; > > case GETNCNT: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; >@@ -701,7 +792,7 @@ > break; > > case GETPID: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; >@@ -713,7 +804,7 @@ > break; > > case GETVAL: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; >@@ -749,7 +840,7 @@ > mtx_unlock(sema_mtxp); > array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); > mtx_lock(sema_mtxp); >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) >@@ -762,7 +853,7 @@ > break; > > case GETZCNT: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) > goto done2; >@@ -774,7 +865,7 @@ > break; > > case SETVAL: >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) > goto done2; >@@ -805,7 +896,7 @@ > mtx_lock(sema_mtxp); > if (error) > break; >- if ((error = semvalid(semid, semakptr)) != 0) >+ if ((error = semvalid(semid, rpr, semakptr)) != 0) > goto done2; > KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); > if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) >@@ -855,7 +946,8 @@ > struct ucred *cred = td->td_ucred; > > DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ >+ if (sem_find_prison(cred) == NULL) > return (ENOSYS); > > mtx_lock(&sem_mtx); >@@ -862,6 +954,8 @@ > if (key != IPC_PRIVATE) { > for (semid = 0; semid < seminfo.semmni; semid++) { > if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) && >+ sema[semid].cred != NULL && >+ sema[semid].cred->cr_prison == cred->cr_prison && > sema[semid].u.sem_perm.key == key) > break; > } >@@ -978,6 +1072,7 @@ > struct sembuf small_sops[SMALL_SOPS]; > int semid = uap->semid; > size_t nsops = uap->nsops; >+ struct prison *rpr; > struct sembuf *sops; > struct semid_kernel *semakptr; > struct sembuf *sopptr = NULL; >@@ -994,7 +1089,8 @@ > #endif > DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops)); > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = sem_find_prison(td->td_ucred); >+ if (sem == NULL) > return (ENOSYS); > > semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ >@@ -1044,6 +1140,8 @@ > error = EINVAL; > goto done2; > } >+ if ((error = sem_prison_cansee(rpr, semakptr)) != 0) >+ goto done2; > /* > * Initial pass thru sops to see what permissions are needed. > * Also perform any checks that don't need repeating on each >@@ -1367,11 +1465,217 @@ > static int > sysctl_sema(SYSCTL_HANDLER_ARGS) > { >+ struct prison *rpr; >+ struct sbuf sb; >+ struct semid_kernel tmp, empty; >+ struct semid_kernel *semakptr; >+ int error, i; > >- return (SYSCTL_OUT(req, sema, >- sizeof(struct semid_kernel) * seminfo.semmni)); >+ error = sysctl_wire_old_buffer(req, 0); >+ if (error != 0) >+ goto done; >+ rpr = sem_find_prison(req->td->td_ucred); >+ sbuf_new_for_sysctl(&sb, NULL, sizeof(struct semid_kernel) * >+ seminfo.semmni, req); >+ >+ bzero(&empty, sizeof(empty)); >+ for (i = 0; i < seminfo.semmni; i++) { >+ semakptr = &sema[i]; >+ if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || >+ rpr == NULL || sem_prison_cansee(rpr, semakptr) != 0) { >+ semakptr = ∅ >+ } else if (req->td->td_ucred->cr_prison != >+ semakptr->cred->cr_prison) { >+ bcopy(semakptr, &tmp, sizeof(tmp)); >+ semakptr = &tmp; >+ semakptr->u.sem_perm.key = IPC_PRIVATE; >+ } >+ >+ sbuf_bcat(&sb, semakptr, sizeof(*semakptr)); >+ } >+ error = sbuf_finish(&sb); >+ sbuf_delete(&sb); >+ >+done: >+ return (error); > } > >+static int >+sem_prison_check(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *prpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* >+ * sysvsem is a jailsys integer. >+ * It must be "disable" if the parent jail is disabled. >+ */ >+ error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)); >+ if (error != ENOENT) { >+ if (error != 0) >+ return (error); >+ switch (jsys) { >+ case JAIL_SYS_DISABLE: >+ break; >+ case JAIL_SYS_NEW: >+ case JAIL_SYS_INHERIT: >+ prison_lock(pr->pr_parent); >+ prpr = osd_jail_get(pr->pr_parent, sem_prison_slot); >+ prison_unlock(pr->pr_parent); >+ if (prpr == NULL) >+ return (EPERM); >+ break; >+ default: >+ return (EINVAL); >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+sem_prison_set(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *tpr, *orpr, *nrpr, *trpr; >+ struct vfsoptlist *opts = data; >+ void *rsv; >+ int jsys, descend; >+ >+ /* >+ * sysvsem controls which jail is the root of the associated sems (this >+ * jail or same as the parent), or if the feature is available at all. >+ */ >+ if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT) >+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) >+ ? JAIL_SYS_INHERIT >+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) >+ ? JAIL_SYS_DISABLE >+ : -1; >+ if (jsys == JAIL_SYS_DISABLE) { >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, sem_prison_slot); >+ if (orpr != NULL) >+ osd_jail_del(pr, sem_prison_slot); >+ prison_unlock(pr); >+ if (orpr != NULL) { >+ if (orpr == pr) >+ sem_prison_cleanup(pr); >+ /* Disable all child jails as well. */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, sem_prison_slot); >+ if (trpr != NULL) { >+ osd_jail_del(tpr, sem_prison_slot); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ sem_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } else if (jsys != -1) { >+ if (jsys == JAIL_SYS_NEW) >+ nrpr = pr; >+ else { >+ prison_lock(pr->pr_parent); >+ nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot); >+ prison_unlock(pr->pr_parent); >+ } >+ rsv = osd_reserve(sem_prison_slot); >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, sem_prison_slot); >+ if (orpr != nrpr) >+ (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, >+ nrpr); >+ else >+ osd_free_reserved(rsv); >+ prison_unlock(pr); >+ if (orpr != nrpr) { >+ if (orpr == pr) >+ sem_prison_cleanup(pr); >+ if (orpr != NULL) { >+ /* Change child jails matching the old root, */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, >+ sem_prison_slot); >+ if (trpr == orpr) { >+ (void)osd_jail_set(tpr, >+ sem_prison_slot, nrpr); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ sem_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+sem_prison_get(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* Set sysvsem based on the jail's root prison. */ >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, sem_prison_slot); >+ prison_unlock(pr); >+ jsys = rpr == NULL ? JAIL_SYS_DISABLE >+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; >+ error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys)); >+ if (error == ENOENT) >+ error = 0; >+ return (error); >+} >+ >+static int >+sem_prison_remove(void *obj, void *data __unused) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, sem_prison_slot); >+ prison_unlock(pr); >+ if (rpr == pr) >+ sem_prison_cleanup(pr); >+ return (0); >+} >+ >+static void >+sem_prison_cleanup(struct prison *pr) >+{ >+ int i; >+ >+ /* Remove any sems that belong to this jail. */ >+ mtx_lock(&sem_mtx); >+ for (i = 0; i < seminfo.semmni; i++) { >+ if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && >+ sema[i].cred != NULL && sema[i].cred->cr_prison == pr) { >+ mtx_lock(&sema_mtx[i]); >+ sem_remove(i, NULL); >+ mtx_unlock(&sema_mtx[i]); >+ } >+ } >+ mtx_unlock(&sem_mtx); >+} >+ >+SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores"); >+ > #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ > defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) > >@@ -1398,8 +1702,6 @@ > { > int error; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >- return (ENOSYS); > if (uap->which < 0 || > uap->which >= sizeof(semcalls)/sizeof(semcalls[0])) > return (EINVAL); >Index: kern/sysv_shm.c >=================================================================== >--- kern/sysv_shm.c (revision 298132) >+++ kern/sysv_shm.c (working copy) >@@ -80,6 +80,7 @@ > #include <sys/racct.h> > #include <sys/resourcevar.h> > #include <sys/rwlock.h> >+#include <sys/sbuf.h> > #include <sys/stat.h> > #include <sys/syscall.h> > #include <sys/syscallsubr.h> >@@ -112,7 +113,8 @@ > > static int shm_last_free, shm_nused, shmalloced; > vm_size_t shm_committed; >-static struct shmid_kernel *shmsegs; >+static struct shmid_kernel *shmsegs; >+static unsigned shm_prison_slot; > > struct shmmap_state { > vm_offset_t va; >@@ -120,8 +122,8 @@ > }; > > static void shm_deallocate_segment(struct shmid_kernel *); >-static int shm_find_segment_by_key(key_t); >-static struct shmid_kernel *shm_find_segment(int, bool); >+static int shm_find_segment_by_key(struct prison *, key_t); >+static struct shmid_kernel *shm_find_segment(struct prison *, int, bool); > static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *); > static void shmrealloc(void); > static int shminit(void); >@@ -130,6 +132,14 @@ > static void shmexit_myhook(struct vmspace *vm); > static void shmfork_myhook(struct proc *p1, struct proc *p2); > static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS); >+static void shm_remove(struct shmid_kernel *, int); >+static struct prison *shm_find_prison(struct ucred *); >+static int shm_prison_cansee(struct prison *, struct shmid_kernel *); >+static int shm_prison_check(void *, void *); >+static int shm_prison_set(void *, void *); >+static int shm_prison_get(void *, void *); >+static int shm_prison_remove(void *, void *); >+static void shm_prison_cleanup(struct prison *); > > /* > * Tuneable values. >@@ -189,12 +199,14 @@ > #define SYSVSHM_ASSERT_LOCKED() sx_assert(&sysvshmsx, SA_XLOCKED) > > static int >-shm_find_segment_by_key(key_t key) >+shm_find_segment_by_key(struct prison *pr, key_t key) > { > int i; > > for (i = 0; i < shmalloced; i++) > if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) && >+ shmsegs[i].cred != NULL && >+ shmsegs[i].cred->cr_prison == pr && > shmsegs[i].u.shm_perm.key == key) > return (i); > return (-1); >@@ -205,7 +217,7 @@ > * is_shmid is false. > */ > static struct shmid_kernel * >-shm_find_segment(int arg, bool is_shmid) >+shm_find_segment(struct prison *rpr, int arg, bool is_shmid) > { > struct shmid_kernel *shmseg; > int segnum; >@@ -217,7 +229,8 @@ > if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || > (!shm_allow_removed && > (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) || >- (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg))) >+ (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) || >+ !shm_prison_cansee(rpr, shmseg)) > return (NULL); > return (shmseg); > } >@@ -271,7 +284,42 @@ > return (0); > } > >+static void >+shm_remove(struct shmid_kernel *shmseg, int segnum) >+{ >+ >+ shmseg->u.shm_perm.key = IPC_PRIVATE; >+ shmseg->u.shm_perm.mode |= SHMSEG_REMOVED; >+ if (shmseg->u.shm_nattch <= 0) { >+ shm_deallocate_segment(shmseg); >+ shm_last_free = segnum; >+ } >+} >+ >+static struct prison * >+shm_find_prison(struct ucred *cred) >+{ >+ struct prison *pr, *rpr; >+ >+ pr = cred->cr_prison; >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, shm_prison_slot); >+ prison_unlock(pr); >+ return rpr; >+} >+ > static int >+shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg) >+{ >+ >+ if (shmseg->cred == NULL || >+ !(rpr == shmseg->cred->cr_prison || >+ prison_ischild(rpr, shmseg->cred->cr_prison))) >+ return (EINVAL); >+ return (0); >+} >+ >+static int > kern_shmdt_locked(struct thread *td, const void *shmaddr) > { > struct proc *p = td->td_proc; >@@ -283,7 +331,7 @@ > int i; > > SYSVSHM_ASSERT_LOCKED(); >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ if (shm_find_prison(td->td_ucred) == NULL) > return (ENOSYS); > shmmap_s = p->p_vmspace->vm_shm; > if (shmmap_s == NULL) >@@ -325,6 +373,7 @@ > kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr, > int shmflg) > { >+ struct prison *rpr; > struct proc *p = td->td_proc; > struct shmid_kernel *shmseg; > struct shmmap_state *shmmap_s; >@@ -334,7 +383,8 @@ > int error, i, rv; > > SYSVSHM_ASSERT_LOCKED(); >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = shm_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > shmmap_s = p->p_vmspace->vm_shm; > if (shmmap_s == NULL) { >@@ -345,7 +395,7 @@ > KASSERT(p->p_vmspace->vm_shm == NULL, ("raced")); > p->p_vmspace->vm_shm = shmmap_s; > } >- shmseg = shm_find_segment(shmid, true); >+ shmseg = shm_find_segment(rpr, shmid, true); > if (shmseg == NULL) > return (EINVAL); > error = ipcperm(td, &shmseg->u.shm_perm, >@@ -431,6 +481,7 @@ > kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf, > size_t *bufsz) > { >+ struct prison *rpr; > struct shmid_kernel *shmseg; > struct shmid_ds *shmidp; > struct shm_info shm_info; >@@ -438,7 +489,8 @@ > > SYSVSHM_ASSERT_LOCKED(); > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = shm_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > > switch (cmd) { >@@ -471,7 +523,7 @@ > return (0); > } > } >- shmseg = shm_find_segment(shmid, cmd != SHM_STAT); >+ shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT); > if (shmseg == NULL) > return (EINVAL); > #ifdef MAC >@@ -482,10 +534,13 @@ > switch (cmd) { > case SHM_STAT: > case IPC_STAT: >+ shmidp = (struct shmid_ds *)buf; > error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); > if (error != 0) > return (error); >- memcpy(buf, &shmseg->u, sizeof(struct shmid_ds)); >+ memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds)); >+ if (td->td_ucred->cr_prison != shmseg->cred->cr_prison) >+ shmidp->shm_perm.key = IPC_PRIVATE; > if (bufsz != NULL) > *bufsz = sizeof(struct shmid_ds); > if (cmd == SHM_STAT) { >@@ -509,12 +564,7 @@ > error = ipcperm(td, &shmseg->u.shm_perm, IPC_M); > if (error != 0) > return (error); >- shmseg->u.shm_perm.key = IPC_PRIVATE; >- shmseg->u.shm_perm.mode |= SHMSEG_REMOVED; >- if (shmseg->u.shm_nattch <= 0) { >- shm_deallocate_segment(shmseg); >- shm_last_free = IPCID_TO_IX(shmid); >- } >+ shm_remove(shmseg, IPCID_TO_IX(shmid)); > break; > #if 0 > case SHM_LOCK: >@@ -721,7 +771,7 @@ > int segnum, mode; > int error; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ if (shm_find_prison(td->td_ucred) == NULL) > return (ENOSYS); > mode = uap->shmflg & ACCESSPERMS; > SYSVSHM_LOCK(); >@@ -728,7 +778,8 @@ > if (uap->key == IPC_PRIVATE) { > error = shmget_allocate_segment(td, uap, mode); > } else { >- segnum = shm_find_segment_by_key(uap->key); >+ segnum = shm_find_segment_by_key(td->td_ucred->cr_prison, >+ uap->key); > if (segnum >= 0) > error = shmget_existing(td, uap, mode, segnum); > else if ((uap->shmflg & IPC_CREAT) == 0) >@@ -849,7 +900,15 @@ > static int > shminit(void) > { >+ struct prison *pr; >+ void *rsv; > int i, error; >+ osd_method_t methods[PR_MAXMETHOD] = { >+ [PR_METHOD_CHECK] = shm_prison_check, >+ [PR_METHOD_SET] = shm_prison_set, >+ [PR_METHOD_GET] = shm_prison_get, >+ [PR_METHOD_REMOVE] = shm_prison_remove, >+ }; > > #ifndef BURN_BRIDGES > if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0) >@@ -879,6 +938,29 @@ > shmexit_hook = &shmexit_myhook; > shmfork_hook = &shmfork_myhook; > >+ /* Set current prisons according to their allow.sysvipc. */ >+ shm_prison_slot = osd_jail_register(NULL, methods); >+ rsv = osd_reserve(shm_prison_slot); >+ prison_lock(&prison0); >+ (void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0); >+ prison_unlock(&prison0); >+ rsv = NULL; >+ sx_slock(&allprison_lock); >+ TAILQ_FOREACH(pr, &allprison, pr_list) { >+ if (rsv == NULL) >+ rsv = osd_reserve(shm_prison_slot); >+ prison_lock(pr); >+ if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { >+ (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, >+ &prison0); >+ rsv = NULL; >+ } >+ prison_unlock(pr); >+ } >+ if (rsv != NULL) >+ osd_free_reserved(rsv); >+ sx_sunlock(&allprison_lock); >+ > error = syscall_helper_register(shm_syscalls, SY_THR_STATIC_KLD); > if (error != 0) > return (error); >@@ -902,6 +984,8 @@ > syscall32_helper_unregister(shm32_syscalls); > #endif > syscall_helper_unregister(shm_syscalls); >+ if (shm_prison_slot != 0) >+ osd_jail_deregister(shm_prison_slot); > > for (i = 0; i < shmalloced; i++) { > #ifdef MAC >@@ -925,14 +1009,221 @@ > static int > sysctl_shmsegs(SYSCTL_HANDLER_ARGS) > { >- int error; >+ struct prison *rpr; >+ struct sbuf sb; >+ struct shmid_kernel tmp, empty; >+ struct shmid_kernel *shmseg; >+ int error, i; > > SYSVSHM_LOCK(); >- error = SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])); >+ >+ error = sysctl_wire_old_buffer(req, 0); >+ if (error != 0) >+ goto done; >+ rpr = shm_find_prison(req->td->td_ucred); >+ sbuf_new_for_sysctl(&sb, NULL, shmalloced * sizeof(shmsegs[0]), req); >+ >+ bzero(&empty, sizeof(empty)); >+ empty.u.shm_perm.mode = SHMSEG_FREE; >+ for (i = 0; i < shmalloced; i++) { >+ shmseg = &shmsegs[i]; >+ if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || >+ rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) { >+ shmseg = ∅ >+ } else if (req->td->td_ucred->cr_prison != >+ shmseg->cred->cr_prison) { >+ bcopy(shmseg, &tmp, sizeof(tmp)); >+ shmseg = &tmp; >+ shmseg->u.shm_perm.key = IPC_PRIVATE; >+ } >+ >+ sbuf_bcat(&sb, shmseg, sizeof(*shmseg)); >+ } >+ error = sbuf_finish(&sb); >+ sbuf_delete(&sb); >+ >+done: > SYSVSHM_UNLOCK(); > return (error); > } > >+static int >+shm_prison_check(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *prpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* >+ * sysvshm is a jailsys integer. >+ * It must be "disable" if the parent jail is disabled. >+ */ >+ error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)); >+ if (error != ENOENT) { >+ if (error != 0) >+ return (error); >+ switch (jsys) { >+ case JAIL_SYS_DISABLE: >+ break; >+ case JAIL_SYS_NEW: >+ case JAIL_SYS_INHERIT: >+ prison_lock(pr->pr_parent); >+ prpr = osd_jail_get(pr->pr_parent, shm_prison_slot); >+ prison_unlock(pr->pr_parent); >+ if (prpr == NULL) >+ return (EPERM); >+ break; >+ default: >+ return (EINVAL); >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+shm_prison_set(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *tpr, *orpr, *nrpr, *trpr; >+ struct vfsoptlist *opts = data; >+ void *rsv; >+ int jsys, descend; >+ >+ /* >+ * sysvshm controls which jail is the root of the associated segments >+ * (this jail or same as the parent), or if the feature is available >+ * at all. >+ */ >+ if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT) >+ jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) >+ ? JAIL_SYS_INHERIT >+ : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) >+ ? JAIL_SYS_DISABLE >+ : -1; >+ if (jsys == JAIL_SYS_DISABLE) { >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, shm_prison_slot); >+ if (orpr != NULL) >+ osd_jail_del(pr, shm_prison_slot); >+ prison_unlock(pr); >+ if (orpr != NULL) { >+ if (orpr == pr) >+ shm_prison_cleanup(pr); >+ /* Disable all child jails as well. */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, shm_prison_slot); >+ if (trpr != NULL) { >+ osd_jail_del(tpr, shm_prison_slot); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ shm_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } else if (jsys != -1) { >+ if (jsys == JAIL_SYS_NEW) >+ nrpr = pr; >+ else { >+ prison_lock(pr->pr_parent); >+ nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot); >+ prison_unlock(pr->pr_parent); >+ } >+ rsv = osd_reserve(shm_prison_slot); >+ prison_lock(pr); >+ orpr = osd_jail_get(pr, shm_prison_slot); >+ if (orpr != nrpr) >+ (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, >+ nrpr); >+ else >+ osd_free_reserved(rsv); >+ prison_unlock(pr); >+ if (orpr != nrpr) { >+ if (orpr == pr) >+ shm_prison_cleanup(pr); >+ if (orpr != NULL) { >+ /* Change child jails matching the old root, */ >+ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { >+ prison_lock(tpr); >+ trpr = osd_jail_get(tpr, >+ shm_prison_slot); >+ if (trpr == orpr) { >+ (void)osd_jail_set(tpr, >+ shm_prison_slot, nrpr); >+ prison_unlock(tpr); >+ if (trpr == tpr) >+ shm_prison_cleanup(tpr); >+ } else { >+ prison_unlock(tpr); >+ descend = 0; >+ } >+ } >+ } >+ } >+ } >+ >+ return (0); >+} >+ >+static int >+shm_prison_get(void *obj, void *data) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ struct vfsoptlist *opts = data; >+ int error, jsys; >+ >+ /* Set sysvshm based on the jail's root prison. */ >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, shm_prison_slot); >+ prison_unlock(pr); >+ jsys = rpr == NULL ? JAIL_SYS_DISABLE >+ : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; >+ error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys)); >+ if (error == ENOENT) >+ error = 0; >+ return (error); >+} >+ >+static int >+shm_prison_remove(void *obj, void *data __unused) >+{ >+ struct prison *pr = obj; >+ struct prison *rpr; >+ >+ SYSVSHM_LOCK(); >+ prison_lock(pr); >+ rpr = osd_jail_get(pr, shm_prison_slot); >+ prison_unlock(pr); >+ if (rpr == pr) >+ shm_prison_cleanup(pr); >+ SYSVSHM_UNLOCK(); >+ return (0); >+} >+ >+static void >+shm_prison_cleanup(struct prison *pr) >+{ >+ struct shmid_kernel *shmseg; >+ int i; >+ >+ /* Remove any segments that belong to this jail. */ >+ for (i = 0; i < shmalloced; i++) { >+ shmseg = &shmsegs[i]; >+ if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) && >+ shmseg->cred != NULL && shmseg->cred->cr_prison == pr) { >+ shm_remove(shmseg, i); >+ } >+ } >+} >+ >+SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory"); >+ > #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43)) > struct oshmid_ds { > struct ipc_perm_old shm_perm; /* operation perms */ >@@ -957,10 +1248,12 @@ > { > #ifdef COMPAT_43 > int error = 0; >+ struct prison *rpr; > struct shmid_kernel *shmseg; > struct oshmid_ds outbuf; > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >+ rpr = shm_find_prison(td->td_ucred); >+ if (rpr == NULL) > return (ENOSYS); > if (uap->cmd != IPC_STAT) { > return (freebsd7_shmctl(td, >@@ -967,7 +1260,7 @@ > (struct freebsd7_shmctl_args *)uap)); > } > SYSVSHM_LOCK(); >- shmseg = shm_find_segment(uap->shmid, true); >+ shmseg = shm_find_segment(rpr, uap->shmid, true); > if (shmseg == NULL) { > SYSVSHM_UNLOCK(); > return (EINVAL); >@@ -1020,8 +1313,6 @@ > sys_shmsys(struct thread *td, struct shmsys_args *uap) > { > >- if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC)) >- return (ENOSYS); > if (uap->which < 0 || uap->which >= nitems(shmcalls)) > return (EINVAL); > return ((*shmcalls[uap->which])(td, &uap->a2));
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 48471
:
28925
|
28926
|
157658
|
157661
|
169450
|
169452
| 169480