FreeBSD Bugzilla – Attachment 174231 Details for
Bug 187594
[zfs] [patch] ZFS ARC behavior problem and fix
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Second cut against 11.0-STABLE
patch-11.0-STABLE-r305005-rev2 (text/plain), 6.20 KB, created by
karl
on 2016-08-30 20:23:02 UTC
(
hide
)
Description:
Second cut against 11.0-STABLE
Filename:
MIME Type:
Creator:
karl
Created:
2016-08-30 20:23:02 UTC
Size:
6.20 KB
patch
obsolete
>Index: cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c >=================================================================== >--- cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 304971) >+++ cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) >@@ -237,7 +237,17 @@ int zfs_arc_p_min_shift = 0; > int zfs_disable_dup_eviction = 0; > uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ > u_int zfs_arc_free_target = 0; >+u_int zfs_arc_wakeup_pager = 0; >+u_int zfs_arc_wakeup_delay = 500; >+int zfs_arc_last_slab = 0; > >+#define WAKE_PAGER >+#ifdef WAKE_PAGER >+#define WAKE_PAGER_CONSTANT 10 / 9 /* Pager wakeup threshold */ >+static int arc_init_done = 0; /* After arc_warm is valid */ >+extern void pagedaemon_wakeup(void); >+#endif >+ > /* Absolute min for arc min / max is 16MB. */ > static uint64_t arc_abs_min = 16 << 20; > >@@ -251,7 +261,8 @@ static void > arc_free_target_init(void *unused __unused) > { > >- zfs_arc_free_target = vm_pageout_wakeup_thresh; >+ zfs_arc_free_target = vm_pageout_wakeup_thresh + (vm_pageout_wakeup_thresh / 20); >+ zfs_arc_wakeup_pager = vm_pageout_wakeup_thresh * WAKE_PAGER_CONSTANT; > } > SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, > arc_free_target_init, NULL); >@@ -3476,6 +3487,11 @@ int64_t arc_pages_pp_reserve = 64; > int64_t arc_swapfs_reserve = 64; > > /* >+ * Declare file-local static for event processor bypass >+ */ >+static unsigned int arc_no_wake_event = 0; >+ >+/* > * Return the amount of memory that can be consumed before reclaim will be > * needed. Positive if there is sufficient free memory, negative indicates > * the amount of memory that needs to be freed up. >@@ -3488,6 +3504,12 @@ arc_available_memory(void) > free_memory_reason_t r = FMR_UNKNOWN; > > #ifdef _KERNEL >+#ifdef WAKE_PAGER >+ sbintime_t now; >+ static sbintime_t last_pagedaemon_wake = 0; >+ void call_arc_kmem_reap(int); >+#endif /* WAKE_PAGER */ >+ > if (needfree > 0) { > n = PAGESIZE * (-needfree); > if (n < lowest) { >@@ -3495,6 +3517,37 @@ arc_available_memory(void) > r = FMR_NEEDFREE; > } > } >+#ifdef WAKE_PAGER >+/* >+ * When arc is initialized, perform the following: >+ * >+ * 1. If we are in the "memory is low enough to wake the pager" zone, >+ * reap the kernel UMA caches once per wakeup_delay period 500ms default) >+ * AND wake the pager up (so it can demote pages from inactive to cache to >+ * ultimately the free list.) >+ * >+ * 2. If we're below VM's free_target in free RAM reap *one* UMA zone per >+ * time period (500ms). >+ * >+ */ >+ if (arc_init_done) { >+ now = getsbinuptime(); >+ if ((now - last_pagedaemon_wake) / SBT_1MS > zfs_arc_wakeup_delay) { >+ last_pagedaemon_wake = now; >+ arc_no_wake_event++; /* Set bypass flag for ARC */ >+ if ( ( ((int64_t) freemem - zfs_arc_wakeup_pager) < 0) && (arc_warm == B_TRUE) ) { >+ call_arc_kmem_reap(0); /* Reap caches if we're close */ >+ DTRACE_PROBE(arc__wake_pagedaemon); >+ (void) pagedaemon_wakeup(); /* Wake the pager */ >+ } else { >+ if ( ((int64_t) freemem - vm_cnt.v_free_target) < 0) { >+ call_arc_kmem_reap(1); /* Reap one cache if lots of memory */ >+ DTRACE_PROBE2(arc__reap_one, int, zfs_arc_last_slab, int, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); >+ } >+ } >+ } >+ } >+#endif /* WAKE_PAGER */ > > /* > * Cooperate with pagedaemon when it's time for it to scan >@@ -3633,12 +3686,22 @@ extern kmem_cache_t *zio_buf_cache[]; > extern kmem_cache_t *zio_data_buf_cache[]; > extern kmem_cache_t *range_seg_cache; > >+/* >+ * Pass a flag to this routine; if zero, then reap all. If not then reap >+ * one slab on a rotating basis. This allows a low-rate call to be used >+ * on a routine, maintenance basis even when not low on RAM so we don't have >+ * huge amounts of RAM out in unused UMA allocations. >+ */ > static __noinline void >-arc_kmem_reap_now(void) >+arc_kmem_reap_now(flag) >+int flag; > { > size_t i; > kmem_cache_t *prev_cache = NULL; > kmem_cache_t *prev_data_cache = NULL; >+ int arc_cache_reaped = 0; >+ int arc_data_cache_reaped = 0; >+ int reset_last_slab = 0; > > DTRACE_PROBE(arc__kmem_reap_start); > #ifdef _KERNEL >@@ -3660,13 +3723,28 @@ static __noinline void > for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { > if (zio_buf_cache[i] != prev_cache) { > prev_cache = zio_buf_cache[i]; >- kmem_cache_reap_now(zio_buf_cache[i]); >+ if ((!flag) || ((i > zfs_arc_last_slab) && (!arc_cache_reaped))) { >+ kmem_cache_reap_now(zio_buf_cache[i]); >+ arc_cache_reaped++; >+ } > } > if (zio_data_buf_cache[i] != prev_data_cache) { > prev_data_cache = zio_data_buf_cache[i]; >- kmem_cache_reap_now(zio_data_buf_cache[i]); >+ if ((!flag) || ((i > zfs_arc_last_slab) && (!arc_data_cache_reaped))) { >+ kmem_cache_reap_now(zio_data_buf_cache[i]); >+ arc_data_cache_reaped++; >+ } > } >+ if (flag && (!reset_last_slab) && (arc_cache_reaped || arc_data_cache_reaped)) { >+ reset_last_slab = i; >+ } > } >+ if (reset_last_slab) { >+ zfs_arc_last_slab = reset_last_slab; >+ } >+ if ((!arc_cache_reaped) && (!arc_data_cache_reaped) && (flag)) { /* Found nothing to reap on one-pass */ >+ zfs_arc_last_slab = 0; /* Reset */ >+ } > kmem_cache_reap_now(buf_cache); > kmem_cache_reap_now(hdr_full_cache); > kmem_cache_reap_now(hdr_l2only_cache); >@@ -3684,6 +3762,12 @@ static __noinline void > DTRACE_PROBE(arc__kmem_reap_end); > } > >+void call_arc_kmem_reap(flag) >+int flag; >+{ >+ arc_kmem_reap_now(flag); >+} >+ > /* > * Threads can block in arc_get_data_buf() waiting for this thread to evict > * enough data and signal them to proceed. When this happens, the threads in >@@ -3726,7 +3810,7 @@ arc_reclaim_thread(void *dummy __unused) > */ > growtime = gethrtime() + SEC2NSEC(arc_grow_retry); > >- arc_kmem_reap_now(); >+ arc_kmem_reap_now(0); > > /* > * If we are still low on memory, shrink the ARC >@@ -5431,6 +5515,10 @@ static eventhandler_tag arc_event_lowmem = NULL; > static void > arc_lowmem(void *arg __unused, int howto __unused) > { >+ if (arc_no_wake_event) { /* Don't do it if we woke the pager */ >+ arc_no_wake_event = 0; /* Just clear the flag */ >+ return; >+ } > > mutex_enter(&arc_reclaim_lock); > /* XXX: Memory deficit should be passed as argument. */ >@@ -5696,6 +5784,9 @@ arc_init(void) > printf(" in /boot/loader.conf.\n"); > } > #endif >+#ifdef WAKE_PAGER >+ arc_init_done++; >+#endif /* WAKE_PAGER */ > } > > void
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 187594
:
140882
|
140883
|
140884
|
140885
|
140886
|
140887
|
140888
|
140889
|
140890
|
140891
|
140892
|
146178
|
146203
|
146249
|
146251
|
146287
|
146300
|
146373
|
146423
|
146424
|
146456
|
146816
|
146817
|
146851
|
146852
|
146854
|
146859
|
146861
|
146946
|
146947
|
146948
|
146949
|
147014
|
147068
|
147069
|
147070
|
147265
|
147274
|
147275
|
147276
|
147286
|
147459
|
147607
|
147609
|
147733
|
147738
|
147754
|
147815
|
152852
|
158809
|
159207
|
159688
|
159859
|
159905
|
161691
|
161692
|
161943
|
164051
|
174197
|
174198
|
174231
|
174232
|
174254
|
186818