FreeBSD Bugzilla – Attachment 235195 Details for
Bug 264867
kevent EVFILT_TIMER, timer expiration with twice the period
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
13.1 patch
diff (text/plain), 29.92 KB, created by
Mark Johnston
on 2022-07-11 20:15:40 UTC
(
hide
)
Description:
13.1 patch
Filename:
MIME Type:
Creator:
Mark Johnston
Created:
2022-07-11 20:15:40 UTC
Size:
29.92 KB
patch
obsolete
>diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c >index 48e06ee082fc..89d19bca9317 100644 >--- a/sys/kern/kern_clocksource.c >+++ b/sys/kern/kern_clocksource.c >@@ -65,8 +65,9 @@ static int doconfigtimer(void); > static void configtimer(int start); > static int round_freq(struct eventtimer *et, int freq); > >-static sbintime_t getnextcpuevent(int idle); >-static sbintime_t getnextevent(void); >+struct pcpu_state; >+static sbintime_t getnextcpuevent(struct pcpu_state *state, int idle); >+static sbintime_t getnextevent(struct pcpu_state *state); > static int handleevents(sbintime_t now, int fake); > > static struct mtx et_hw_mtx; >@@ -213,8 +214,8 @@ handleevents(sbintime_t now, int fake) > callout_process(now); > } > >- t = getnextcpuevent(0); > ET_HW_LOCK(state); >+ t = getnextcpuevent(state, 0); > if (!busy) { > state->idle = 0; > state->nextevent = t; >@@ -229,13 +230,11 @@ handleevents(sbintime_t now, int fake) > * Schedule binuptime of the next event on current CPU. > */ > static sbintime_t >-getnextcpuevent(int idle) >+getnextcpuevent(struct pcpu_state *state, int idle) > { > sbintime_t event; >- struct pcpu_state *state; > u_int hardfreq; > >- state = DPCPU_PTR(timerstate); > /* Handle hardclock() events, skipping some if CPU is idle. */ > event = state->nexthard; > if (idle) { >@@ -266,9 +265,8 @@ getnextcpuevent(int idle) > * Schedule binuptime of the next event on all CPUs. > */ > static sbintime_t >-getnextevent(void) >+getnextevent(struct pcpu_state *state) > { >- struct pcpu_state *state; > sbintime_t event; > #ifdef SMP > int cpu; >@@ -278,7 +276,6 @@ getnextevent(void) > > c = -1; > #endif >- state = DPCPU_PTR(timerstate); > event = state->nextevent; > #ifdef SMP > if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) { >@@ -385,10 +382,10 @@ loadtimer(sbintime_t now, int start) > uint64_t tmp; > int eq; > >- if (timer->et_flags & ET_FLAGS_PERCPU) { >- state = DPCPU_PTR(timerstate); >+ state = DPCPU_PTR(timerstate); >+ if (timer->et_flags & ET_FLAGS_PERCPU) > next = &state->nexttick; >- } else >+ else > next = &nexttick; > if (periodic) { > if (start) { >@@ -407,7 +404,7 @@ loadtimer(sbintime_t now, int start) > et_start(timer, new, timerperiod); > } > } else { >- new = getnextevent(); >+ new = getnextevent(state); > eq = (new == *next); > CTR4(KTR_SPARE2, "load at %d: next %d.%08x eq %d", > curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq); >@@ -681,14 +678,12 @@ cpu_initclocks_bsp(void) > void > cpu_initclocks_ap(void) > { >- sbintime_t now; > struct pcpu_state *state; > struct thread *td; > > state = DPCPU_PTR(timerstate); >- now = sbinuptime(); > ET_HW_LOCK(state); >- state->now = now; >+ state->now = sbinuptime(); > hardclock_sync(curcpu); > spinlock_enter(); > ET_HW_UNLOCK(state); >@@ -772,14 +767,14 @@ cpu_idleclock(void) > ) > return (-1); > state = DPCPU_PTR(timerstate); >+ ET_HW_LOCK(state); > if (periodic) > now = state->now; > else > now = sbinuptime(); > CTR3(KTR_SPARE2, "idle at %d: now %d.%08x", > curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); >- t = getnextcpuevent(1); >- ET_HW_LOCK(state); >+ t = getnextcpuevent(state, 1); > state->idle = 1; > state->nextevent = t; > if (!periodic) >@@ -799,15 +794,15 @@ cpu_activeclock(void) > struct thread *td; > > state = DPCPU_PTR(timerstate); >- if (state->idle == 0 || busy) >+ if (atomic_load_int(&state->idle) == 0 || busy) > return; >+ spinlock_enter(); > if (periodic) > now = state->now; > else > now = sbinuptime(); > CTR3(KTR_SPARE2, "active at %d: now %d.%08x", > curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); >- spinlock_enter(); > td = curthread; > td->td_intr_nesting_level++; > handleevents(now, 1); >diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c >index 1f859b286843..d851d2f91d49 100644 >--- a/sys/kern/sched_ule.c >+++ b/sys/kern/sched_ule.c >@@ -226,9 +226,16 @@ static int __read_mostly sched_idlespins = 10000; > static int __read_mostly sched_idlespinthresh = -1; > > /* >- * tdq - per processor runqs and statistics. All fields are protected by the >- * tdq_lock. The load and lowpri may be accessed without to avoid excess >- * locking in sched_pickcpu(); >+ * tdq - per processor runqs and statistics. A mutex synchronizes access to >+ * most fields. Some fields are loaded or modified without the mutex. >+ * >+ * Locking protocols: >+ * (c) constant after initialization >+ * (f) flag, set with the tdq lock held, cleared on local CPU >+ * (l) all accesses are CPU-local >+ * (ls) stores are performed by the local CPU, loads may be lockless >+ * (t) all accesses are protected by the tdq mutex >+ * (ts) stores are serialized by the tdq mutex, loads may be lockless > */ > struct tdq { > /* >@@ -236,32 +243,41 @@ struct tdq { > * tdq_lock is padded to avoid false sharing with tdq_load and > * tdq_cpu_idle. > */ >- struct mtx_padalign tdq_lock; /* run queue lock. */ >- struct cpu_group *tdq_cg; /* Pointer to cpu topology. */ >- volatile int tdq_load; /* Aggregate load. */ >- volatile int tdq_cpu_idle; /* cpu_idle() is active. */ >- int tdq_sysload; /* For loadavg, !ITHD load. */ >- volatile int tdq_transferable; /* Transferable thread count. */ >- volatile short tdq_switchcnt; /* Switches this tick. */ >- volatile short tdq_oldswitchcnt; /* Switches last tick. */ >- u_char tdq_lowpri; /* Lowest priority thread. */ >- u_char tdq_owepreempt; /* Remote preemption pending. */ >- u_char tdq_idx; /* Current insert index. */ >- u_char tdq_ridx; /* Current removal index. */ >- int tdq_id; /* cpuid. */ >- struct runq tdq_realtime; /* real-time run queue. */ >- struct runq tdq_timeshare; /* timeshare run queue. */ >- struct runq tdq_idle; /* Queue of IDLE threads. */ >+ struct mtx_padalign tdq_lock; /* run queue lock. */ >+ struct cpu_group *tdq_cg; /* (c) Pointer to cpu topology. */ >+ struct thread *tdq_curthread; /* (t) Current executing thread. */ >+ int tdq_load; /* (ts) Aggregate load. */ >+ int tdq_sysload; /* (ts) For loadavg, !ITHD load. */ >+ int tdq_cpu_idle; /* (ls) cpu_idle() is active. */ >+ int tdq_transferable; /* (ts) Transferable thread count. */ >+ short tdq_switchcnt; /* (l) Switches this tick. */ >+ short tdq_oldswitchcnt; /* (l) Switches last tick. */ >+ u_char tdq_lowpri; /* (ts) Lowest priority thread. */ >+ u_char tdq_owepreempt; /* (f) Remote preemption pending. */ >+ u_char tdq_idx; /* (t) Current insert index. */ >+ u_char tdq_ridx; /* (t) Current removal index. */ >+ int tdq_id; /* (c) cpuid. */ >+ struct runq tdq_realtime; /* (t) real-time run queue. */ >+ struct runq tdq_timeshare; /* (t) timeshare run queue. */ >+ struct runq tdq_idle; /* (t) Queue of IDLE threads. */ > char tdq_name[TDQ_NAME_LEN]; > #ifdef KTR > char tdq_loadname[TDQ_LOADNAME_LEN]; > #endif >-} __aligned(64); >+}; > > /* Idle thread states and config. */ > #define TDQ_RUNNING 1 > #define TDQ_IDLE 2 > >+/* Lockless accessors. */ >+#define TDQ_LOAD(tdq) atomic_load_int(&(tdq)->tdq_load) >+#define TDQ_TRANSFERABLE(tdq) atomic_load_int(&(tdq)->tdq_transferable) >+#define TDQ_SWITCHCNT(tdq) (atomic_load_short(&(tdq)->tdq_switchcnt) + \ >+ atomic_load_short(&(tdq)->tdq_oldswitchcnt)) >+#define TDQ_SWITCHCNT_INC(tdq) (atomic_store_short(&(tdq)->tdq_switchcnt, \ >+ atomic_load_short(&(tdq)->tdq_switchcnt) + 1)) >+ > #ifdef SMP > struct cpu_group __read_mostly *cpu_top; /* CPU topology */ > >@@ -306,6 +322,7 @@ static struct tdq tdq_cpu; > #define TDQ_UNLOCK(t) mtx_unlock_spin(TDQ_LOCKPTR((t))) > #define TDQ_LOCKPTR(t) ((struct mtx *)(&(t)->tdq_lock)) > >+static void sched_setpreempt(int); > static void sched_priority(struct thread *); > static void sched_thread_priority(struct thread *, u_char); > static int sched_interact_score(struct thread *); >@@ -321,18 +338,18 @@ static void tdq_load_rem(struct tdq *, struct thread *); > static __inline void tdq_runq_add(struct tdq *, struct thread *, int); > static __inline void tdq_runq_rem(struct tdq *, struct thread *); > static inline int sched_shouldpreempt(int, int, int); >-void tdq_print(int cpu); >+static void tdq_print(int cpu); > static void runq_print(struct runq *rq); >-static void tdq_add(struct tdq *, struct thread *, int); >+static int tdq_add(struct tdq *, struct thread *, int); > #ifdef SMP >-static struct thread *tdq_move(struct tdq *, struct tdq *); >+static int tdq_move(struct tdq *, struct tdq *); > static int tdq_idled(struct tdq *); >-static void tdq_notify(struct tdq *, struct thread *); >+static void tdq_notify(struct tdq *, int lowpri); > static struct thread *tdq_steal(struct tdq *, int); > static struct thread *runq_steal(struct runq *, int); > static int sched_pickcpu(struct thread *, int); > static void sched_balance(void); >-static int sched_balance_pair(struct tdq *, struct tdq *); >+static bool sched_balance_pair(struct tdq *, struct tdq *); > static inline struct tdq *sched_setcpu(struct thread *, int, int); > static inline void thread_unblock_switch(struct thread *, struct mtx *); > static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS); >@@ -396,7 +413,7 @@ runq_print(struct runq *rq) > /* > * Print the status of a per-cpu thread queue. Should be a ddb show cmd. > */ >-void >+static void __unused > tdq_print(int cpu) > { > struct tdq *tdq; >@@ -606,7 +623,7 @@ tdq_setlowpri(struct tdq *tdq, struct thread *ctd) > > TDQ_LOCK_ASSERT(tdq, MA_OWNED); > if (ctd == NULL) >- ctd = pcpu_find(TDQ_ID(tdq))->pc_curthread; >+ ctd = tdq->tdq_curthread; > td = tdq_choose(tdq); > if (td == NULL || td->td_priority > ctd->td_priority) > tdq->tdq_lowpri = ctd->td_priority; >@@ -697,7 +714,7 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, > if (!CPU_ISSET(c, &cg->cg_mask)) > continue; > tdq = TDQ_CPU(c); >- l = tdq->tdq_load; >+ l = TDQ_LOAD(tdq); > if (c == s->cs_prefer) { > if (__predict_false(s->cs_running)) > l--; >@@ -712,7 +729,8 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, > * If the threads is already on the CPU, don't look on the TDQ > * priority, since it can be the priority of the thread itself. > */ >- if (l > s->cs_load || (tdq->tdq_lowpri <= s->cs_pri && >+ if (l > s->cs_load || >+ (atomic_load_char(&tdq->tdq_lowpri) <= s->cs_pri && > (!s->cs_running || c != s->cs_prefer)) || > !CPU_ISSET(c, s->cs_mask)) > continue; >@@ -767,14 +785,14 @@ cpu_search_highest(const struct cpu_group *cg, const struct cpu_search *s, > if (!CPU_ISSET(c, &cg->cg_mask)) > continue; > tdq = TDQ_CPU(c); >- l = tdq->tdq_load; >+ l = TDQ_LOAD(tdq); > load = l * 256; > total += load; > > /* > * Check this CPU is acceptable. > */ >- if (l < s->cs_load || (tdq->tdq_transferable < s->cs_trans) || >+ if (l < s->cs_load || TDQ_TRANSFERABLE(tdq) < s->cs_trans || > !CPU_ISSET(c, s->cs_mask)) > continue; > >@@ -846,13 +864,13 @@ sched_balance_group(struct cpu_group *cg) > if (CPU_EMPTY(&lmask)) > break; > tdq = TDQ_CPU(high); >- if (tdq->tdq_load == 1) { >+ if (TDQ_LOAD(tdq) == 1) { > /* > * There is only one running thread. We can't move > * it from here, so tell it to pick new CPU by itself. > */ > TDQ_LOCK(tdq); >- td = pcpu_find(high)->pc_curthread; >+ td = tdq->tdq_curthread; > if ((td->td_flags & TDF_IDLETD) == 0 && > THREAD_CAN_MIGRATE(td)) { > td->td_flags |= TDF_NEEDRESCHED | TDF_PICKCPU; >@@ -864,9 +882,9 @@ sched_balance_group(struct cpu_group *cg) > } > anylow = 1; > nextlow: >- if (tdq->tdq_transferable == 0) >+ if (TDQ_TRANSFERABLE(tdq) == 0) > continue; >- low = sched_lowest(cg, &lmask, -1, tdq->tdq_load - 1, high, 1); >+ low = sched_lowest(cg, &lmask, -1, TDQ_LOAD(tdq) - 1, high, 1); > /* Stop if we looked well and found no less loaded CPU. */ > if (anylow && low == -1) > break; >@@ -929,37 +947,49 @@ tdq_unlock_pair(struct tdq *one, struct tdq *two) > } > > /* >- * Transfer load between two imbalanced thread queues. >+ * Transfer load between two imbalanced thread queues. Returns true if a thread >+ * was moved between the queues, and false otherwise. > */ >-static int >+static bool > sched_balance_pair(struct tdq *high, struct tdq *low) > { >- struct thread *td; >- int cpu; >+ int cpu, lowpri; >+ bool ret; > >+ ret = false; > tdq_lock_pair(high, low); >- td = NULL; >+ > /* > * Transfer a thread from high to low. > */ >- if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load && >- (td = tdq_move(high, low)) != NULL) { >- /* >- * In case the target isn't the current cpu notify it of the >- * new load, possibly sending an IPI to force it to reschedule. >- */ >- cpu = TDQ_ID(low); >- if (cpu != PCPU_GET(cpuid)) >- tdq_notify(low, td); >+ if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load) { >+ lowpri = tdq_move(high, low); >+ if (lowpri != -1) { >+ /* >+ * In case the target isn't the current CPU notify it of >+ * the new load, possibly sending an IPI to force it to >+ * reschedule. Otherwise maybe schedule a preemption. >+ */ >+ cpu = TDQ_ID(low); >+ if (cpu != PCPU_GET(cpuid)) >+ tdq_notify(low, lowpri); >+ else >+ sched_setpreempt(low->tdq_lowpri); >+ ret = true; >+ } > } > tdq_unlock_pair(high, low); >- return (td != NULL); >+ return (ret); > } > > /* >- * Move a thread from one thread queue to another. >+ * Move a thread from one thread queue to another. Returns -1 if the source >+ * queue was empty, else returns the maximum priority of all threads in >+ * the destination queue prior to the addition of the new thread. In the latter >+ * case, this priority can be used to determine whether an IPI needs to be >+ * delivered. > */ >-static struct thread * >+static int > tdq_move(struct tdq *from, struct tdq *to) > { > struct thread *td; >@@ -973,7 +1003,7 @@ tdq_move(struct tdq *from, struct tdq *to) > cpu = TDQ_ID(to); > td = tdq_steal(tdq, cpu); > if (td == NULL) >- return (NULL); >+ return (-1); > > /* > * Although the run queue is locked the thread may be >@@ -984,9 +1014,7 @@ tdq_move(struct tdq *from, struct tdq *to) > THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(from)); > td->td_lock = TDQ_LOCKPTR(to); > td_get_sched(td)->ts_cpu = cpu; >- tdq_add(to, td, SRQ_YIELDING); >- >- return (td); >+ return (tdq_add(to, td, SRQ_YIELDING)); > } > > /* >@@ -1005,15 +1033,15 @@ tdq_idled(struct tdq *tdq) > return (1); > CPU_FILL(&mask); > CPU_CLR(PCPU_GET(cpuid), &mask); >- restart: >- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; >+restart: >+ switchcnt = TDQ_SWITCHCNT(tdq); > for (cg = tdq->tdq_cg, goup = 0; ; ) { > cpu = sched_highest(cg, &mask, steal_thresh, 1); > /* > * We were assigned a thread but not preempted. Returning > * 0 here will cause our caller to switch to it. > */ >- if (tdq->tdq_load) >+ if (TDQ_LOAD(tdq)) > return (0); > > /* >@@ -1049,8 +1077,8 @@ tdq_idled(struct tdq *tdq) > * this situation about 20% of the time on an 8 core > * 16 thread Ryzen 7, but it still helps performance. > */ >- if (steal->tdq_load < steal_thresh || >- steal->tdq_transferable == 0) >+ if (TDQ_LOAD(steal) < steal_thresh || >+ TDQ_TRANSFERABLE(steal) == 0) > goto restart; > /* > * Try to lock both queues. If we are assigned a thread while >@@ -1075,16 +1103,16 @@ tdq_idled(struct tdq *tdq) > * of date. The latter is rare. In either case restart > * the search. > */ >- if (steal->tdq_load < steal_thresh || >- steal->tdq_transferable == 0 || >- switchcnt != tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt) { >+ if (TDQ_LOAD(steal) < steal_thresh || >+ TDQ_TRANSFERABLE(steal) == 0 || >+ switchcnt != TDQ_SWITCHCNT(tdq)) { > tdq_unlock_pair(tdq, steal); > goto restart; > } > /* > * Steal the thread and switch to it. > */ >- if (tdq_move(steal, tdq) != NULL) >+ if (tdq_move(steal, tdq) != -1) > break; > /* > * We failed to acquire a thread even though it looked >@@ -1104,20 +1132,27 @@ tdq_idled(struct tdq *tdq) > > /* > * Notify a remote cpu of new work. Sends an IPI if criteria are met. >+ * >+ * "lowpri" is the minimum scheduling priority among all threads on >+ * the queue prior to the addition of the new thread. > */ > static void >-tdq_notify(struct tdq *tdq, struct thread *td) >+tdq_notify(struct tdq *tdq, int lowpri) > { >- struct thread *ctd; >- int pri; > int cpu; > >+ TDQ_LOCK_ASSERT(tdq, MA_OWNED); >+ KASSERT(tdq->tdq_lowpri <= lowpri, >+ ("tdq_notify: lowpri %d > tdq_lowpri %d", lowpri, tdq->tdq_lowpri)); >+ > if (tdq->tdq_owepreempt) > return; >- cpu = td_get_sched(td)->ts_cpu; >- pri = td->td_priority; >- ctd = pcpu_find(cpu)->pc_curthread; >- if (!sched_shouldpreempt(pri, ctd->td_priority, 1)) >+ >+ /* >+ * Check to see if the newly added thread should preempt the one >+ * currently running. >+ */ >+ if (!sched_shouldpreempt(tdq->tdq_lowpri, lowpri, 1)) > return; > > /* >@@ -1127,14 +1162,15 @@ tdq_notify(struct tdq *tdq, struct thread *td) > */ > atomic_thread_fence_seq_cst(); > >- if (TD_IS_IDLETHREAD(ctd)) { >- /* >- * If the MD code has an idle wakeup routine try that before >- * falling back to IPI. >- */ >- if (!tdq->tdq_cpu_idle || cpu_idle_wakeup(cpu)) >- return; >- } >+ /* >+ * Try to figure out if we can signal the idle thread instead of sending >+ * an IPI. This check is racy; at worst, we will deliever an IPI >+ * unnecessarily. >+ */ >+ cpu = TDQ_ID(tdq); >+ if (TD_IS_IDLETHREAD(tdq->tdq_curthread) && >+ (atomic_load_int(&tdq->tdq_cpu_idle) == 0 || cpu_idle_wakeup(cpu))) >+ return; > > /* > * The run queues have been updated, so any switch on the remote CPU >@@ -1326,13 +1362,15 @@ sched_pickcpu(struct thread *td, int flags) > * expired and it is idle, run it there. > */ > if (THREAD_CAN_SCHED(td, ts->ts_cpu) && >- tdq->tdq_lowpri >= PRI_MIN_IDLE && >+ atomic_load_int(&tdq->tdq_lowpri) >= PRI_MIN_IDLE && > SCHED_AFFINITY(ts, CG_SHARE_L2)) { > if (cg->cg_flags & CG_FLAG_THREAD) { > /* Check all SMT threads for being idle. */ > for (cpu = cg->cg_first; cpu <= cg->cg_last; cpu++) { >+ pri = >+ atomic_load_char(&TDQ_CPU(cpu)->tdq_lowpri); > if (CPU_ISSET(cpu, &cg->cg_mask) && >- TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) >+ pri < PRI_MIN_IDLE) > break; > } > if (cpu > cg->cg_last) { >@@ -1403,8 +1441,8 @@ sched_pickcpu(struct thread *td, int flags) > */ > tdq = TDQ_CPU(cpu); > if (THREAD_CAN_SCHED(td, self) && TDQ_SELF()->tdq_lowpri > pri && >- tdq->tdq_lowpri < PRI_MIN_IDLE && >- TDQ_SELF()->tdq_load <= tdq->tdq_load + 1) { >+ atomic_load_char(&tdq->tdq_lowpri) < PRI_MIN_IDLE && >+ TDQ_LOAD(TDQ_SELF()) <= TDQ_LOAD(tdq) + 1) { > SCHED_STAT_INC(pickcpu_local); > cpu = self; > } >@@ -1507,6 +1545,7 @@ sched_setup(void *dummy) > TDQ_LOCK(tdq); > thread0.td_lock = TDQ_LOCKPTR(tdq); > tdq_load_add(tdq, &thread0); >+ tdq->tdq_curthread = &thread0; > tdq->tdq_lowpri = thread0.td_priority; > TDQ_UNLOCK(tdq); > } >@@ -2001,7 +2040,7 @@ tdq_trysteal(struct tdq *tdq) > * If a thread was added while interrupts were disabled don't > * steal one here. > */ >- if (tdq->tdq_load > 0) { >+ if (TDQ_LOAD(tdq) > 0) { > TDQ_LOCK(tdq); > break; > } >@@ -2043,8 +2082,8 @@ tdq_trysteal(struct tdq *tdq) > * At this point unconditionally exit the loop to bound > * the time spent in the critcal section. > */ >- if (steal->tdq_load < steal_thresh || >- steal->tdq_transferable == 0) >+ if (TDQ_LOAD(steal) < steal_thresh || >+ TDQ_TRANSFERABLE(steal) == 0) > continue; > /* > * Try to lock both queues. If we are assigned a thread while >@@ -2061,8 +2100,8 @@ tdq_trysteal(struct tdq *tdq) > * The data returned by sched_highest() is stale and > * the chosen CPU no longer has an eligible thread. > */ >- if (steal->tdq_load < steal_thresh || >- steal->tdq_transferable == 0) { >+ if (TDQ_LOAD(steal) < steal_thresh || >+ TDQ_TRANSFERABLE(steal) == 0) { > TDQ_UNLOCK(steal); > break; > } >@@ -2071,7 +2110,7 @@ tdq_trysteal(struct tdq *tdq) > * bail out and let the idle thread to a more complete search > * outside of a critical section. > */ >- if (tdq_move(steal, tdq) == NULL) { >+ if (tdq_move(steal, tdq) == -1) { > TDQ_UNLOCK(steal); > break; > } >@@ -2090,6 +2129,7 @@ static struct mtx * > sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags) > { > struct tdq *tdn; >+ int lowpri; > > KASSERT(THREAD_CAN_MIGRATE(td) || > (td_get_sched(td)->ts_flags & TSF_BOUND) != 0, >@@ -2107,8 +2147,8 @@ sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags) > */ > TDQ_UNLOCK(tdq); > TDQ_LOCK(tdn); >- tdq_add(tdn, td, flags); >- tdq_notify(tdn, td); >+ lowpri = tdq_add(tdn, td, flags); >+ tdq_notify(tdn, lowpri); > TDQ_UNLOCK(tdn); > TDQ_LOCK(tdq); > #endif >@@ -2162,9 +2202,9 @@ sched_switch(struct thread *td, int flags) > (flags & SW_PREEMPT) != 0; > td->td_flags &= ~(TDF_NEEDRESCHED | TDF_PICKCPU | TDF_SLICEEND); > td->td_owepreempt = 0; >- tdq->tdq_owepreempt = 0; >+ atomic_store_char(&tdq->tdq_owepreempt, 0); > if (!TD_IS_IDLETHREAD(td)) >- tdq->tdq_switchcnt++; >+ TDQ_SWITCHCNT_INC(tdq); > > /* > * Always block the thread lock so we can drop the tdq lock early. >@@ -2217,6 +2257,7 @@ sched_switch(struct thread *td, int flags) > * thread-queue locked. > */ > TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED); >+ MPASS(td == tdq->tdq_curthread); > newtd = choosethread(); > sched_pctcpu_update(td_get_sched(newtd), 0); > TDQ_UNLOCK(tdq); >@@ -2523,6 +2564,7 @@ sched_clock(struct thread *td, int cnt) > */ > tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt; > tdq->tdq_switchcnt = tdq->tdq_load; >+ > /* > * Advance the insert index once for each tick to ensure that all > * threads get a chance to run. >@@ -2579,10 +2621,10 @@ sched_runnable(void) > > tdq = TDQ_SELF(); > if ((curthread->td_flags & TDF_IDLETD) != 0) { >- if (tdq->tdq_load > 0) >+ if (TDQ_LOAD(tdq) > 0) > goto out; > } else >- if (tdq->tdq_load - 1 > 0) >+ if (TDQ_LOAD(tdq) - 1 > 0) > goto out; > load = 0; > out: >@@ -2603,30 +2645,31 @@ sched_choose(void) > tdq = TDQ_SELF(); > TDQ_LOCK_ASSERT(tdq, MA_OWNED); > td = tdq_choose(tdq); >- if (td) { >+ if (td != NULL) { > tdq_runq_rem(tdq, td); > tdq->tdq_lowpri = td->td_priority; >- return (td); >+ } else { >+ tdq->tdq_lowpri = PRI_MAX_IDLE; >+ td = PCPU_GET(idlethread); > } >- tdq->tdq_lowpri = PRI_MAX_IDLE; >- return (PCPU_GET(idlethread)); >+ tdq->tdq_curthread = td; >+ return (td); > } > > /* >- * Set owepreempt if necessary. Preemption never happens directly in ULE, >- * we always request it once we exit a critical section. >+ * Set owepreempt if the currently running thread has lower priority than "pri". >+ * Preemption never happens directly in ULE, we always request it once we exit a >+ * critical section. > */ >-static inline void >-sched_setpreempt(struct thread *td) >+static void >+sched_setpreempt(int pri) > { > struct thread *ctd; > int cpri; >- int pri; >- >- THREAD_LOCK_ASSERT(curthread, MA_OWNED); > > ctd = curthread; >- pri = td->td_priority; >+ THREAD_LOCK_ASSERT(ctd, MA_OWNED); >+ > cpri = ctd->td_priority; > if (pri < cpri) > ctd->td_flags |= TDF_NEEDRESCHED; >@@ -2642,9 +2685,10 @@ sched_setpreempt(struct thread *td) > * thread to it. This is the internal function called when the tdq is > * predetermined. > */ >-void >+static int > tdq_add(struct tdq *tdq, struct thread *td, int flags) > { >+ int lowpri; > > TDQ_LOCK_ASSERT(tdq, MA_OWNED); > THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED); >@@ -2655,10 +2699,12 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags) > KASSERT(td->td_flags & TDF_INMEM, > ("sched_add: thread swapped out")); > >- if (td->td_priority < tdq->tdq_lowpri) >+ lowpri = tdq->tdq_lowpri; >+ if (td->td_priority < lowpri) > tdq->tdq_lowpri = td->td_priority; > tdq_runq_add(tdq, td, flags); > tdq_load_add(tdq, td); >+ return (lowpri); > } > > /* >@@ -2672,7 +2718,7 @@ sched_add(struct thread *td, int flags) > { > struct tdq *tdq; > #ifdef SMP >- int cpu; >+ int cpu, lowpri; > #endif > > KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq add", >@@ -2696,11 +2742,11 @@ sched_add(struct thread *td, int flags) > */ > cpu = sched_pickcpu(td, flags); > tdq = sched_setcpu(td, cpu, flags); >- tdq_add(tdq, td, flags); >+ lowpri = tdq_add(tdq, td, flags); > if (cpu != PCPU_GET(cpuid)) >- tdq_notify(tdq, td); >+ tdq_notify(tdq, lowpri); > else if (!(flags & SRQ_YIELDING)) >- sched_setpreempt(td); >+ sched_setpreempt(td->td_priority); > #else > tdq = TDQ_SELF(); > /* >@@ -2714,9 +2760,9 @@ sched_add(struct thread *td, int flags) > else > thread_lock_set(td, TDQ_LOCKPTR(tdq)); > } >- tdq_add(tdq, td, flags); >+ (void)tdq_add(tdq, td, flags); > if (!(flags & SRQ_YIELDING)) >- sched_setpreempt(td); >+ sched_setpreempt(td->td_priority); > #endif > if (!(flags & SRQ_HOLDTD)) > thread_unlock(td); >@@ -2874,10 +2920,10 @@ sched_load(void) > > total = 0; > CPU_FOREACH(i) >- total += TDQ_CPU(i)->tdq_sysload; >+ total += atomic_load_int(&TDQ_CPU(i)->tdq_sysload); > return (total); > #else >- return (TDQ_SELF()->tdq_sysload); >+ return (atomic_load_int(&TDQ_SELF()->tdq_sysload)); > #endif > } > >@@ -2917,18 +2963,18 @@ sched_idletd(void *dummy) > THREAD_NO_SLEEPING(); > oldswitchcnt = -1; > for (;;) { >- if (tdq->tdq_load) { >+ if (TDQ_LOAD(tdq)) { > thread_lock(td); > mi_switch(SW_VOL | SWT_IDLE); > } >- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; >+ switchcnt = TDQ_SWITCHCNT(tdq); > #ifdef SMP > if (always_steal || switchcnt != oldswitchcnt) { > oldswitchcnt = switchcnt; > if (tdq_idled(tdq) == 0) > continue; > } >- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; >+ switchcnt = TDQ_SWITCHCNT(tdq); > #else > oldswitchcnt = switchcnt; > #endif >@@ -2941,23 +2987,23 @@ sched_idletd(void *dummy) > */ > if (TDQ_IDLESPIN(tdq) && switchcnt > sched_idlespinthresh) { > for (i = 0; i < sched_idlespins; i++) { >- if (tdq->tdq_load) >+ if (TDQ_LOAD(tdq)) > break; > cpu_spinwait(); > } > } > > /* If there was context switch during spin, restart it. */ >- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; >- if (tdq->tdq_load != 0 || switchcnt != oldswitchcnt) >+ switchcnt = TDQ_SWITCHCNT(tdq); >+ if (TDQ_LOAD(tdq) != 0 || switchcnt != oldswitchcnt) > continue; > > /* Run main MD idle handler. */ >- tdq->tdq_cpu_idle = 1; >+ atomic_store_int(&tdq->tdq_cpu_idle, 1); > /* >- * Make sure that tdq_cpu_idle update is globally visible >- * before cpu_idle() read tdq_load. The order is important >- * to avoid race with tdq_notify. >+ * Make sure that the tdq_cpu_idle update is globally visible >+ * before cpu_idle() reads tdq_load. The order is important >+ * to avoid races with tdq_notify(). > */ > atomic_thread_fence_seq_cst(); > /* >@@ -2965,21 +3011,21 @@ sched_idletd(void *dummy) > * threads often enough to make it worthwhile to do so in > * order to avoid calling cpu_idle(). > */ >- if (tdq->tdq_load != 0) { >- tdq->tdq_cpu_idle = 0; >+ if (TDQ_LOAD(tdq) != 0) { >+ atomic_store_int(&tdq->tdq_cpu_idle, 0); > continue; > } > cpu_idle(switchcnt * 4 > sched_idlespinthresh); >- tdq->tdq_cpu_idle = 0; >+ atomic_store_int(&tdq->tdq_cpu_idle, 0); > > /* > * Account thread-less hardware interrupts and > * other wakeup reasons equal to context switches. > */ >- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; >+ switchcnt = TDQ_SWITCHCNT(tdq); > if (switchcnt != oldswitchcnt) > continue; >- tdq->tdq_switchcnt++; >+ TDQ_SWITCHCNT_INC(tdq); > oldswitchcnt++; > } > } >diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c >index 53b32672132a..d7647b2e25ef 100644 >--- a/sys/x86/x86/cpu_machdep.c >+++ b/sys/x86/x86/cpu_machdep.c >@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); > #include "opt_maxmem.h" > #include "opt_mp_watchdog.h" > #include "opt_platform.h" >+#include "opt_sched.h" > #ifdef __i386__ > #include "opt_apic.h" > #endif >@@ -528,32 +529,24 @@ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ > SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, > 0, "Use MONITOR/MWAIT for short idle"); > >-static void >-cpu_idle_acpi(sbintime_t sbt) >+static bool >+cpu_idle_enter(int *statep, int newstate) > { >- int *state; >+ KASSERT(atomic_load_int(statep) == STATE_RUNNING, >+ ("%s: state %d", __func__, atomic_load_int(statep))); > >- state = &PCPU_PTR(monitorbuf)->idle_state; >- atomic_store_int(state, STATE_SLEEPING); >- >- /* See comments in cpu_idle_hlt(). */ >- disable_intr(); >- if (sched_runnable()) >- enable_intr(); >- else if (cpu_idle_hook) >- cpu_idle_hook(sbt); >- else >- acpi_cpu_c1(); >- atomic_store_int(state, STATE_RUNNING); >-} >- >-static void >-cpu_idle_hlt(sbintime_t sbt) >-{ >- int *state; >- >- state = &PCPU_PTR(monitorbuf)->idle_state; >- atomic_store_int(state, STATE_SLEEPING); >+ /* >+ * A fence is needed to prevent reordering of the load in >+ * sched_runnable() with this store to the idle state word. Without it, >+ * cpu_idle_wakeup() can observe the state as STATE_RUNNING after having >+ * added load to the queue, and elide an IPI. Then, sched_runnable() >+ * can observe tdq_load == 0, so the CPU ends up idling with pending >+ * work. >+ */ >+ atomic_store_int(statep, newstate); >+#if defined(SCHED_ULE) && defined(SMP) >+ atomic_thread_fence_seq_cst(); >+#endif > > /* > * Since we may be in a critical section from cpu_idle(), if >@@ -572,35 +565,62 @@ cpu_idle_hlt(sbintime_t sbt) > * interrupt. > */ > disable_intr(); >- if (sched_runnable()) >+ if (sched_runnable()) { > enable_intr(); >- else >- acpi_cpu_c1(); >- atomic_store_int(state, STATE_RUNNING); >+ atomic_store_int(statep, STATE_RUNNING); >+ return (false); >+ } else { >+ return (true); >+ } > } > > static void >-cpu_idle_mwait(sbintime_t sbt) >+cpu_idle_exit(int *statep) >+{ >+ atomic_store_int(statep, STATE_RUNNING); >+} >+ >+static void >+cpu_idle_acpi(sbintime_t sbt) > { > int *state; > > state = &PCPU_PTR(monitorbuf)->idle_state; >- atomic_store_int(state, STATE_MWAIT); >+ if (cpu_idle_enter(state, STATE_SLEEPING)) { >+ if (cpu_idle_hook) >+ cpu_idle_hook(sbt); >+ else >+ acpi_cpu_c1(); >+ cpu_idle_exit(state); >+ } >+} > >- /* See comments in cpu_idle_hlt(). */ >- disable_intr(); >- if (sched_runnable()) { >+static void >+cpu_idle_hlt(sbintime_t sbt) >+{ >+ int *state; >+ >+ state = &PCPU_PTR(monitorbuf)->idle_state; >+ if (cpu_idle_enter(state, STATE_SLEEPING)) { >+ acpi_cpu_c1(); > atomic_store_int(state, STATE_RUNNING); >- enable_intr(); >- return; > } >+} > >- cpu_monitor(state, 0, 0); >- if (atomic_load_int(state) == STATE_MWAIT) >- __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); >- else >- enable_intr(); >- atomic_store_int(state, STATE_RUNNING); >+static void >+cpu_idle_mwait(sbintime_t sbt) >+{ >+ int *state; >+ >+ state = &PCPU_PTR(monitorbuf)->idle_state; >+ if (cpu_idle_enter(state, STATE_MWAIT)) { >+ cpu_monitor(state, 0, 0); >+ if (atomic_load_int(state) == STATE_MWAIT) >+ __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); >+ else >+ enable_intr(); >+ cpu_idle_exit(state); >+ } > } > > static void
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 264867
:
234909
|
235005
|
235023
| 235195