--- sys/kern/kern_clock.c 2019-11-01 01:01:41.000000000 +0100 +++ sys/kern/kern_clock.c 2020-06-06 03:07:00.280026000 +0200 @@ -94,89 +94,130 @@ /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE2(sched, , , tick, "struct thread *", "struct proc *"); +/* forward decl */ +static void _read_cpu_time(long *cp_time, bool ext) + +/* Writes array of CPUSTATES (_EXT) + * common body for sysctl_kern_cp_time() and sysctl_kern_cp_time_ext() + */ static int -sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) +_sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS, bool ext) { int error; - long cp_time[CPUSTATES]; + size_t cpustates = ext? CPUSTATES_EXT: CPUSTATES; + long cp_time[cpustates]; #ifdef SCTL_MASK32 int i; - unsigned int cp_time32[CPUSTATES]; + unsigned int cp_time32[cpustates]; #endif - read_cpu_time(cp_time); + _read_cpu_time(cp_time, ext); #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time32)); - for (i = 0; i < CPUSTATES; i++) + for (i = 0; i < cpustates; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time)); error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); } return error; } +static int +sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) +{ + return _sysctl_kern_cp_time(SYSCTL_HANDLER_VARS, false); +} + +static int +sysctl_kern_cp_time_ext(SYSCTL_HANDLER_ARGS) +{ + return _sysctl_kern_cp_time(SYSCTL_HANDLER_VARS, true); +} + SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, - 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); + 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics (user/nice/sys/intr/idle [1/stathz]; cumulated)"); +SYSCTL_PROC(_kern, OID_AUTO, cp_time_ext, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, + 0,0, sysctl_kern_cp_time_ext, "LU", "extra CPU time statistics (fifo/realtime/user_idle/nice_fifo/nice_rt/nice_idle)\n" + "\tNOTE: fractions of kern.cp_time[user/nice] (absolute -- not a quotient)"); + static long empty[CPUSTATES]; +static long empty_x[CPUSTATES_EXT]; +/* common body for sysctl_kern_cp_times() and sysctl_kern_cp_times_ext() */ static int -sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) +_sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS, bool ext) { struct pcpu *pcpu; int error; int c; long *cp_time; + size_t cpustates = ext? CPUSTATES_EXT: CPUSTATES; #ifdef SCTL_MASK32 - unsigned int cp_time32[CPUSTATES]; + unsigned int cp_time32[cpustates]; int i; #endif if (!req->oldptr) { #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) return SYSCTL_OUT(req, 0, sizeof(cp_time32) * (mp_maxid + 1)); else #endif - return SYSCTL_OUT(req, 0, sizeof(long) * CPUSTATES * (mp_maxid + 1)); + return SYSCTL_OUT(req, 0, sizeof(long) * cpustates * (mp_maxid + 1)); } for (error = 0, c = 0; error == 0 && c <= mp_maxid; c++) { if (!CPU_ABSENT(c)) { pcpu = pcpu_find(c); - cp_time = pcpu->pc_cp_time; + cp_time = ext? pcpu->pc_cp_time_ext: pcpu->pc_cp_time; } else { - cp_time = empty; + cp_time = ext? empty_x: empty; } #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { - for (i = 0; i < CPUSTATES; i++) + for (i = 0; i < cpustates; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif - error = SYSCTL_OUT(req, cp_time, sizeof(long) * CPUSTATES); + error = SYSCTL_OUT(req, cp_time, sizeof(long) * cpustates); } return error; } +static int +sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) +{ + return _sysctl_kern_cp_times(SYSCTL_HANDLER_VARS, false); +} + +static int +sysctl_kern_cp_times_ext(SYSCTL_HANDLER_ARGS) +{ + return _sysctl_kern_cp_times(SYSCTL_HANDLER_VARS, true); +} + SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, - 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); + 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics (kern.cp_time[0] ...)"); +SYSCTL_PROC(_kern, OID_AUTO, cp_times_ext, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, + 0,0, sysctl_kern_cp_times_ext, "LU", "extra per-CPU time statistics (kern.cp_time_ext[0] ...)"); + #ifdef DEADLKRES static const char *blessed[] = { "getblk", "so_snd_sx", "so_rcv_sx", NULL }; @@ -312,27 +353,41 @@ SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, &blktime_threshold, 0, "Number of seconds within is valid to block on a turnstile"); SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, "Number of seconds between any deadlock resolver thread run"); #endif /* DEADLKRES */ +/* common body for read_cpu_time() and read_cpu_time_ext() */ +static void +_read_cpu_time(long *cp_time, bool ext) +{ + struct pcpu *pc; + size_t cpustates = (ext? CPUSTATES_EXT: CPUSTATES); + int i, j; + + /* Sum up global cp_time[]. */ + bzero(cp_time, sizeof(long) * cpustates); + CPU_FOREACH(i) { + pc = pcpu_find(i); + for (j = 0; j < cpustates; j++) + cp_time[j] += (ext? pc->pc_cp_time_ext[j]: pc->pc_cp_time[j]); + } +} + void read_cpu_time(long *cp_time) { - struct pcpu *pc; - int i, j; + _read_cpu_time(cp_time, false); +} - /* Sum up global cp_time[]. */ - bzero(cp_time, sizeof(long) * CPUSTATES); - CPU_FOREACH(i) { - pc = pcpu_find(i); - for (j = 0; j < CPUSTATES; j++) - cp_time[j] += pc->pc_cp_time[j]; - } +void +read_cpu_time_ext(long *cp_time) +{ + _read_cpu_time(cp_time, true); } #include static int watchdog_ticks; static int watchdog_enabled; static void watchdog_fire(void); @@ -626,30 +681,53 @@ statclock(int cnt, int usermode) { struct rusage *ru; struct vmspace *vm; struct thread *td; struct proc *p; long rss; - long *cp_time; + long *cp_time, *cp_time_x; uint64_t runtime, new_switchtime; td = curthread; p = td->td_proc; cp_time = (long *)PCPU_PTR(cp_time); + cp_time_x = (long *)PCPU_PTR(cp_time_ext); if (usermode) { /* * Charge the time as appropriate. */ td->td_uticks += cnt; - if (p->p_nice > NZERO) + if (p->p_nice > NZERO) { cp_time[CP_NICE] += cnt; - else + + /* extra CPU statistics */ + switch (td->td_pri_class) { + case PRI_FIFO: + cp_time_x[CPX_NICE_FIFO] += cnt; break; + case PRI_REALTIME: + cp_time_x[CPX_NICE_RT] += cnt; break; + case PRI_IDLE: + cp_time_x[CPX_NICE_IDLE] += cnt; break; + } + } else { cp_time[CP_USER] += cnt; + + /* extra CPU statistics */ + switch (td->td_pri_class) { + case PRI_FIFO: + cp_time_x[CPX_USER_FIFO] += cnt; break; + case PRI_REALTIME: + cp_time_x[CPX_USER_RT] += cnt; break; + case PRI_IDLE: + cp_time_x[CPX_USER_IDLE] += cnt; break; + } + } + /* "normal" USER_TIMESHARE = cp_time[CP_USER] - (sum(cp_time_x[USER_FIFO,USER_RT,USER_IDLE])) */ } else { /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. --- sys/sys/pcpu.h 2019-11-01 01:01:48.000000000 +0100 +++ sys/sys/pcpu.h 2020-06-05 11:02:05.067900000 +0200 @@ -180,14 +180,15 @@ struct pcb *pc_curpcb; /* Current pcb */ uint64_t pc_switchtime; /* cpu_ticks() at last csw */ int pc_switchticks; /* `ticks' at last csw */ u_int pc_cpuid; /* This cpu number */ STAILQ_ENTRY(pcpu) pc_allcpu; struct lock_list_entry *pc_spinlocks; long pc_cp_time[CPUSTATES]; /* statclock ticks */ + long pc_cp_time_ext[CPUSTATES_EXT]; /* statclock ticks */ struct device *pc_device; void *pc_netisr; /* netisr SWI cookie */ int pc_unused1; /* unused field */ int pc_domain; /* Memory domain. */ struct rm_queue pc_rm_queue; /* rmlock list of trackers */ uintptr_t pc_dynamic; /* Dynamic per-cpu data area */ uint64_t pc_early_dummy_counter; /* Startup time counter(9) */ --- sys/sys/resource.h 2019-11-01 01:01:48.000000000 +0100 +++ sys/sys/resource.h 2020-06-05 10:26:18.556398000 +0200 @@ -168,20 +168,32 @@ #define CP_USER 0 #define CP_NICE 1 #define CP_SYS 2 #define CP_INTR 3 #define CP_IDLE 4 #define CPUSTATES 5 +/* These are exposed via sysctls cp_time(s)_ext: + * NOTE all are (absolute) fractions of CP_USER or CP_NICE, resp. (not as a quotient) + */ +#define CPX_USER_FIFO 0 /* fifo+realtime (libthr(3), rt_prio(8)) */ +#define CPX_USER_RT 1 /* (soft) realtime (rt_prio(8)) */ +#define CPX_USER_IDLE 2 /* (user) idle (idprio(8)) */ +#define CPX_NICE_FIFO 3 /* nice fifo+realtime */ +#define CPX_NICE_RT 4 /* nice (soft) realtime */ +#define CPX_NICE_IDLE 5 /* nice (user) idle */ +#define CPUSTATES_EXT 6 /* extra/refined CPU states */ + #endif /* __BSD_VISIBLE */ #ifdef _KERNEL extern struct loadavg averunnable; void read_cpu_time(long *cp_time); /* Writes array of CPUSTATES */ +void read_cpu_time_ext(long *cp_time); /* Writes array of CPUSTATES_EXT */ #else __BEGIN_DECLS /* XXX 2nd arg to [gs]etpriority() should be an id_t */ int getpriority(int, int); int getrlimit(int, struct rlimit *); --- sys/sys/sysctl.h 2019-11-01 01:01:48.000000000 +0100 +++ sys/sys/sysctl.h 2020-06-05 04:33:37.362328000 +0200 @@ -139,14 +139,15 @@ #define SYSCTL_CT_ASSERT_MASK CTLTYPE #else #define SYSCTL_CT_ASSERT_MASK 0 #endif #define SYSCTL_HANDLER_ARGS struct sysctl_oid *oidp, void *arg1, \ intmax_t arg2, struct sysctl_req *req +#define SYSCTL_HANDLER_VARS oidp, arg1, arg2, req /* definitions for sysctl_req 'lock' member */ #define REQ_UNWIRED 1 #define REQ_WIRED 2 /* definitions for sysctl_req 'flags' member */ #if defined(__aarch64__) || defined(__amd64__) || defined(__powerpc64__) ||\