FreeBSD Bugzilla – Attachment 147070 Details for
Bug 187594
[zfs] [patch] ZFS ARC behavior problem and fix
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
ARC reclaim refactor (against head)
arc-reclaim-head.patch (text/plain), 13.16 KB, created by
Steven Hartland
on 2014-09-08 15:29:46 UTC
(
hide
)
Description:
ARC reclaim refactor (against head)
Filename:
MIME Type:
Creator:
Steven Hartland
Created:
2014-09-08 15:29:46 UTC
Size:
13.16 KB
patch
obsolete
>Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c >=================================================================== >--- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (revision 270875) >+++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (working copy) >@@ -126,42 +126,6 @@ kmem_size_init(void *unused __unused) > } > SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL); > >-/* >- * The return values from kmem_free_* are only valid once the pagedaemon >- * has been initialised, before then they return 0. >- * >- * To ensure the returns are valid the caller can use a SYSINIT with >- * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least >- * SI_ORDER_SECOND. >- */ >-u_int >-kmem_free_target(void) >-{ >- >- return (vm_cnt.v_free_target); >-} >- >-u_int >-kmem_free_min(void) >-{ >- >- return (vm_cnt.v_free_min); >-} >- >-u_int >-kmem_free_count(void) >-{ >- >- return (vm_cnt.v_free_count + vm_cnt.v_cache_count); >-} >- >-u_int >-kmem_page_count(void) >-{ >- >- return (vm_cnt.v_page_count); >-} >- > uint64_t > kmem_size(void) > { >@@ -169,13 +133,6 @@ kmem_size(void) > return (kmem_size_val); > } > >-uint64_t >-kmem_used(void) >-{ >- >- return (vmem_size(kmem_arena, VMEM_ALLOC)); >-} >- > static int > kmem_std_constructor(void *mem, int size __unused, void *private, int flags) > { >Index: sys/cddl/compat/opensolaris/sys/kmem.h >=================================================================== >--- sys/cddl/compat/opensolaris/sys/kmem.h (revision 270875) >+++ sys/cddl/compat/opensolaris/sys/kmem.h (working copy) >@@ -66,17 +66,6 @@ typedef struct kmem_cache { > void *zfs_kmem_alloc(size_t size, int kmflags); > void zfs_kmem_free(void *buf, size_t size); > uint64_t kmem_size(void); >-uint64_t kmem_used(void); >-u_int kmem_page_count(void); >- >-/* >- * The return values from kmem_free_* are only valid once the pagedaemon >- * has been initialised, before then they return 0. >- */ >-u_int kmem_free_count(void); >-u_int kmem_free_target(void); >-u_int kmem_free_min(void); >- > kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, > int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), > void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); >@@ -88,6 +77,9 @@ void kmem_reap(void); > int kmem_debugging(void); > void *calloc(size_t n, size_t s); > >+#define freemem (vm_cnt.v_free_count + vm_cnt.v_cache_count) >+#define minfree vm_cnt.v_free_min >+#define heap_arena kmem_arena > #define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags)) > #define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO) > #define kmem_free(buf, size) zfs_kmem_free((buf), (size)) >Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c >=================================================================== >--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 270875) >+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) >@@ -138,6 +138,7 @@ > #include <sys/sdt.h> > > #include <vm/vm_pageout.h> >+#include <machine/vmparam.h> > > #ifdef illumos > #ifndef _KERNEL >@@ -201,7 +202,7 @@ int zfs_arc_shrink_shift = 0; > int zfs_arc_p_min_shift = 0; > int zfs_disable_dup_eviction = 0; > uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ >-u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */ >+u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */ > > static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); > >@@ -210,11 +211,10 @@ static void > arc_free_target_init(void *unused __unused) > { > >- zfs_arc_free_target = kmem_free_target(); >+ zfs_arc_free_target = (vm_pageout_wakeup_thresh / 2) * 3; > } > SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, > arc_free_target_init, NULL); >-#endif > > TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); > SYSCTL_DECL(_vfs_zfs); >@@ -245,9 +245,9 @@ sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS > if (err != 0 || req->newptr == NULL) > return (err); > >- if (val < kmem_free_min()) >+ if (val < minfree) > return (EINVAL); >- if (val > kmem_page_count()) >+ if (val > vm_cnt.v_page_count) > return (EINVAL); > > zfs_arc_free_target = val; >@@ -254,6 +254,7 @@ sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS > > return (0); > } >+#endif > > /* > * Note that buffers can be in one of 6 states: >@@ -2462,8 +2463,8 @@ arc_shrink(void) > if (arc_c > arc_c_min) { > uint64_t to_free; > >- DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, >- arc_c_min); >+ DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t, >+ arc_c_min, uint64_t, arc_p, uint64_t, to_free); > #ifdef _KERNEL > to_free = arc_c >> arc_shrink_shift; > #else >@@ -2479,6 +2480,10 @@ arc_shrink(void) > arc_c = MAX(arc_size, arc_c_min); > if (arc_p > arc_c) > arc_p = (arc_c >> 1); >+ >+ DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t, >+ arc_p); >+ > ASSERT(arc_c >= arc_c_min); > ASSERT((int64_t)arc_p >= 0); > } >@@ -2503,18 +2508,13 @@ arc_reclaim_needed(void) > return (1); > } > >- if (kmem_free_count() < zfs_arc_free_target) { >- DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t, >- kmem_free_count(), uint64_t, zfs_arc_free_target); >- return (1); >- } >- > /* > * Cooperate with pagedaemon when it's time for it to scan > * and reclaim some pages. > */ >- if (vm_paging_needed()) { >- DTRACE_PROBE(arc__reclaim_paging); >+ if (freemem < zfs_arc_free_target) { >+ DTRACE_PROBE2(arc__reclaim_freemem, uint64_t, >+ freemem, uint64_t, zfs_arc_free_target); > return (1); > } > >@@ -2544,8 +2544,19 @@ arc_reclaim_needed(void) > if (availrmem < swapfs_minfree + swapfs_reserve + extra) > return (1); > >-#if defined(__i386) > /* >+ * Check that we have enough availrmem that memory locking (e.g., via >+ * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum >+ * stores the number of pages that cannot be locked; when availrmem >+ * drops below pages_pp_maximum, page locking mechanisms such as >+ * page_pp_lock() will fail.) >+ */ >+ if (availrmem <= pages_pp_maximum) >+ return (1); >+ >+#endif /* sun */ >+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) >+ /* > * If we're on an i386 platform, it's possible that we'll exhaust the > * kernel heap space before we ever run out of available physical > * memory. Most checks of the size of the heap_area compare against >@@ -2556,25 +2567,33 @@ arc_reclaim_needed(void) > * heap is allocated. (Or, in the calculation, if less than 1/4th is > * free) > */ >- if (btop(vmem_size(heap_arena, VMEM_FREE)) < >- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) >- return (1); >-#endif >-#else /* sun */ >-#ifdef __i386__ >- /* i386 has KVA limits that the raw page counts above don't consider */ >- if (kmem_used() > (kmem_size() * 3) / 4) { >+ if (vmem_size(heap_arena, VMEM_FREE) < >+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) { > DTRACE_PROBE2(arc__reclaim_used, uint64_t, >- kmem_used(), uint64_t, (kmem_size() * 3) / 4); >+ vmem_size(heap_arena, VMEM_FREE), uint64_t, >+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2); > return (1); > } > #endif >+#ifdef sun >+ /* >+ * If zio data pages are being allocated out of a separate heap segment, >+ * then enforce that the size of available vmem for this arena remains >+ * above about 1/16th free. >+ * >+ * Note: The 1/16th arena free requirement was put in place >+ * to aggressively evict memory from the arc in order to avoid >+ * memory fragmentation issues. >+ */ >+ if (zio_arena != NULL && >+ vmem_size(zio_arena, VMEM_FREE) < >+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4)) >+ return (1); > #endif /* sun */ >- >-#else >+#else /* _KERNEL */ > if (spa_get_random(100) == 0) > return (1); >-#endif >+#endif /* _KERNEL */ > DTRACE_PROBE(arc__reclaim_no); > > return (0); >@@ -2583,7 +2602,7 @@ arc_reclaim_needed(void) > extern kmem_cache_t *zio_buf_cache[]; > extern kmem_cache_t *zio_data_buf_cache[]; > >-static void >+static void __used > arc_kmem_reap_now(arc_reclaim_strategy_t strat) > { > size_t i; >@@ -2590,6 +2609,7 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) > kmem_cache_t *prev_cache = NULL; > kmem_cache_t *prev_data_cache = NULL; > >+ DTRACE_PROBE(arc__kmem_reap_start); > #ifdef _KERNEL > if (arc_meta_used >= arc_meta_limit) { > /* >@@ -2625,6 +2645,16 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) > } > kmem_cache_reap_now(buf_cache); > kmem_cache_reap_now(hdr_cache); >+ >+#ifdef sun >+ /* >+ * Ask the vmem arena to reclaim unused memory from its >+ * quantum caches. >+ */ >+ if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) >+ vmem_qcache_reap(zio_arena); >+#endif >+ DTRACE_PROBE(arc__kmem_reap_end); > } > > static void >@@ -2642,6 +2672,7 @@ arc_reclaim_thread(void *dummy __unused) > > if (arc_no_grow) { > if (last_reclaim == ARC_RECLAIM_CONS) { >+ DTRACE_PROBE(arc__reclaim_aggr_no_grow); > last_reclaim = ARC_RECLAIM_AGGR; > } else { > last_reclaim = ARC_RECLAIM_CONS; >@@ -2649,6 +2680,7 @@ arc_reclaim_thread(void *dummy __unused) > } else { > arc_no_grow = TRUE; > last_reclaim = ARC_RECLAIM_AGGR; >+ DTRACE_PROBE(arc__reclaim_aggr); > membar_producer(); > } > >@@ -2753,6 +2785,7 @@ arc_adapt(int bytes, arc_state_t *state) > * cache size, increment the target cache size > */ > if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { >+ DTRACE_PROBE1(arc__inc_adapt, int, bytes); > atomic_add_64(&arc_c, (int64_t)bytes); > if (arc_c > arc_c_max) > arc_c = arc_c_max; >@@ -2774,20 +2807,6 @@ arc_evict_needed(arc_buf_contents_t type) > if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) > return (1); > >-#ifdef sun >-#ifdef _KERNEL >- /* >- * If zio data pages are being allocated out of a separate heap segment, >- * then enforce that the size of available vmem for this area remains >- * above about 1/32nd free. >- */ >- if (type == ARC_BUFC_DATA && zio_arena != NULL && >- vmem_size(zio_arena, VMEM_FREE) < >- (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) >- return (1); >-#endif >-#endif /* sun */ >- > if (arc_reclaim_needed()) > return (1); > >@@ -3946,20 +3965,16 @@ static int > arc_memory_throttle(uint64_t reserve, uint64_t txg) > { > #ifdef _KERNEL >- uint64_t available_memory = >- ptoa((uintmax_t)vm_cnt.v_free_count + vm_cnt.v_cache_count); >+ uint64_t available_memory = ptob(freemem); > static uint64_t page_load = 0; > static uint64_t last_txg = 0; > >-#ifdef sun >-#if defined(__i386) >+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) > available_memory = >- MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); >+ MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); > #endif >-#endif /* sun */ > >- if (vm_cnt.v_free_count + vm_cnt.v_cache_count > >- (uint64_t)physmem * arc_lotsfree_percent / 100) >+ if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) > return (0); > > if (txg > last_txg) { >@@ -3972,7 +3987,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg > * continue to let page writes occur as quickly as possible. > */ > if (curproc == pageproc) { >- if (page_load > available_memory / 4) >+ if (page_load > MAX(ptob(minfree), available_memory) / 4) > return (SET_ERROR(ERESTART)); > /* Note: reserve is inflated, so we deflate */ > page_load += reserve / 8; >@@ -4000,8 +4015,10 @@ arc_tempreserve_space(uint64_t reserve, uint64_t t > int error; > uint64_t anon_size; > >- if (reserve > arc_c/4 && !arc_no_grow) >+ if (reserve > arc_c/4 && !arc_no_grow) { > arc_c = MIN(arc_c_max, reserve * 4); >+ DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c); >+ } > if (reserve > arc_c) > return (SET_ERROR(ENOMEM)); > >@@ -4055,6 +4072,7 @@ arc_lowmem(void *arg __unused, int howto __unused) > mutex_enter(&arc_lowmem_lock); > mutex_enter(&arc_reclaim_thr_lock); > needfree = 1; >+ DTRACE_PROBE(arc__needfree); > cv_signal(&arc_reclaim_thr_cv); > > /* >Index: sys/vm/vm_pageout.c >=================================================================== >--- sys/vm/vm_pageout.c (revision 270875) >+++ sys/vm/vm_pageout.c (working copy) >@@ -76,6 +76,7 @@ > __FBSDID("$FreeBSD$"); > > #include "opt_vm.h" >+#include "opt_kdtrace.h" > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/kernel.h> >@@ -89,6 +90,7 @@ __FBSDID("$FreeBSD$"); > #include <sys/racct.h> > #include <sys/resourcevar.h> > #include <sys/sched.h> >+#include <sys/sdt.h> > #include <sys/signalvar.h> > #include <sys/smp.h> > #include <sys/vnode.h> >@@ -133,6 +135,10 @@ static struct kproc_desc page_kp = { > SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, > &page_kp); > >+SDT_PROVIDER_DEFINE(vm); >+SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache); >+SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); >+ > #if !defined(NO_SWAPPING) > /* the kernel process "vm_daemon"*/ > static void vm_daemon(void); >@@ -667,6 +673,7 @@ vm_pageout_grow_cache(int tries, vm_paddr_t low, v > * may acquire locks and/or sleep, so they can only be invoked > * when "tries" is greater than zero. > */ >+ SDT_PROBE0(vm, , , vm__lowmem_cache); > EVENTHANDLER_INVOKE(vm_lowmem, 0); > > /* >@@ -899,7 +906,7 @@ vm_pageout_map_deactivate_pages(map, desired) > * pass 1 - Move inactive to cache or free > * pass 2 - Launder dirty pages > */ >-static void >+static void __used > vm_pageout_scan(struct vm_domain *vmd, int pass) > { > vm_page_t m, next; >@@ -920,6 +927,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) > /* > * Decrease registered cache sizes. > */ >+ SDT_PROBE0(vm, , , vm__lowmem_scan); > EVENTHANDLER_INVOKE(vm_lowmem, 0); > /* > * We do this explicitly after the caches have been
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 187594
:
140882
|
140883
|
140884
|
140885
|
140886
|
140887
|
140888
|
140889
|
140890
|
140891
|
140892
|
146178
|
146203
|
146249
|
146251
|
146287
|
146300
|
146373
|
146423
|
146424
|
146456
|
146816
|
146817
|
146851
|
146852
|
146854
|
146859
|
146861
|
146946
|
146947
|
146948
|
146949
|
147014
|
147068
|
147069
| 147070 |
147265
|
147274
|
147275
|
147276
|
147286
|
147459
|
147607
|
147609
|
147733
|
147738
|
147754
|
147815
|
152852
|
158809
|
159207
|
159688
|
159859
|
159905
|
161691
|
161692
|
161943
|
164051
|
174197
|
174198
|
174231
|
174232
|
174254
|
186818