FreeBSD Bugzilla – Attachment 147733 Details for
Bug 187594
[zfs] [patch] ZFS ARC behavior problem and fix
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
ARC reclaim refactor + uma clear down (against stable/10)
arc_cache_reclaim_uma_stable10.patch (text/plain), 26.12 KB, created by
Steven Hartland
on 2014-09-27 20:10:27 UTC
(
hide
)
Description:
ARC reclaim refactor + uma clear down (against stable/10)
Filename:
MIME Type:
Creator:
Steven Hartland
Created:
2014-09-27 20:10:27 UTC
Size:
26.12 KB
patch
obsolete
>Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c >=================================================================== >--- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (revision 272005) >+++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (working copy) >@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); > #include <sys/kmem.h> > #include <sys/debug.h> > #include <sys/mutex.h> >+#include <sys/sdt.h> > > #include <vm/vm_page.h> > #include <vm/vm_object.h> >@@ -133,13 +134,6 @@ kmem_size(void) > return (kmem_size_val); > } > >-uint64_t >-kmem_used(void) >-{ >- >- return (vmem_size(kmem_arena, VMEM_ALLOC)); >-} >- > static int > kmem_std_constructor(void *mem, int size __unused, void *private, int flags) > { >@@ -228,12 +222,45 @@ kmem_cache_reap_now(kmem_cache_t *cache) > } > > void >+kmem_cache_reap(kmem_cache_t *cache, uint64_t maxfree) >+{ >+ >+ if (cache->kc_zone != NULL && >+ uma_zone_free_size(cache->kc_zone) > maxfree) >+ zone_drain(cache->kc_zone); >+} >+ >+void > kmem_reap(void) > { > uma_reclaim(); > } >+ >+uint64_t >+kmem_cache_free_size(kmem_cache_t *cache) >+{ >+ uint64_t cachefree; >+ >+ cachefree = (cache->kc_zone == NULL) ? 0 : >+ uma_zone_free_size(cache->kc_zone); >+ >+ /* >+ * Manual probe as the return fbt probe never fires due to >+ * compiler tall call optimisation. >+ */ >+ DTRACE_PROBE2(kmem_cache_free_size, char *, cache->kc_name, uint64_t, >+ cachefree); >+ >+ return (cachefree); >+} >+ > #else > void >+kmem_cache_reap(kmem_cache_t *cache, uint64_t maxfree) >+{ >+} >+ >+void > kmem_cache_reap_now(kmem_cache_t *cache __unused) > { > } >@@ -242,6 +269,11 @@ void > kmem_reap(void) > { > } >+ >+uint64_t >+kmem_cache_free_size(kmem_cache_t *cache) >+{ >+} > #endif > > int >Index: sys/cddl/compat/opensolaris/sys/kmem.h >=================================================================== >--- sys/cddl/compat/opensolaris/sys/kmem.h (revision 272005) >+++ sys/cddl/compat/opensolaris/sys/kmem.h (working copy) >@@ -44,7 +44,7 @@ MALLOC_DECLARE(M_SOLARIS); > #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) > > #define KM_SLEEP M_WAITOK >-#define KM_PUSHPAGE M_WAITOK >+#define KM_PUSHPAGE M_WAITOK|M_USE_RESERVE > #define KM_NOSLEEP M_NOWAIT > #define KM_NODEBUG M_NODUMP > #define KM_NORMALPRI 0 >@@ -66,7 +66,6 @@ typedef struct kmem_cache { > void *zfs_kmem_alloc(size_t size, int kmflags); > void zfs_kmem_free(void *buf, size_t size); > uint64_t kmem_size(void); >-uint64_t kmem_used(void); > kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, > int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), > void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); >@@ -74,10 +73,15 @@ void kmem_cache_destroy(kmem_cache_t *cache); > void *kmem_cache_alloc(kmem_cache_t *cache, int flags); > void kmem_cache_free(kmem_cache_t *cache, void *buf); > void kmem_cache_reap_now(kmem_cache_t *cache); >+void kmem_cache_reap(kmem_cache_t *cache, uint64_t maxfree); > void kmem_reap(void); >+uint64_t kmem_cache_free_size(kmem_cache_t *cache); > int kmem_debugging(void); > void *calloc(size_t n, size_t s); > >+#define freemem (cnt.v_free_count + cnt.v_cache_count) >+#define minfree cnt.v_free_min >+#define heap_arena kmem_arena > #define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags)) > #define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO) > #define kmem_free(buf, size) zfs_kmem_free((buf), (size)) >Index: sys/cddl/compat/opensolaris/sys/param.h >=================================================================== >--- sys/cddl/compat/opensolaris/sys/param.h (revision 272005) >+++ sys/cddl/compat/opensolaris/sys/param.h (working copy) >@@ -36,6 +36,7 @@ > > #ifdef _KERNEL > #define ptob(x) ((uint64_t)(x) << PAGE_SHIFT) >+#define btop(x) ((uint64_t)(x) >> PAGE_SHIFT) > #endif > > #endif >Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c >=================================================================== >--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 272005) >+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) >@@ -138,6 +138,7 @@ > #include <sys/sdt.h> > > #include <vm/vm_pageout.h> >+#include <machine/vmparam.h> > > #ifdef illumos > #ifndef _KERNEL >@@ -159,6 +160,15 @@ typedef enum arc_reclaim_strategy { > ARC_RECLAIM_CONS /* Conservative reclaim strategy */ > } arc_reclaim_strategy_t; > >+typedef enum arc_cache_reclaim_stragegy { >+ ARC_CACHE_RECLAIM_NOW, /* Immediate reclaim strategy */ >+ ARC_CACHE_RECLAIM_SIZE, /* Free size reclaim strategy */ >+ ARC_CACHE_RECLAIM_FORCE, /* Forced immediate reclaim strategy */ >+} arc_cache_reclaim_strategy_t; >+ >+/* When the last cache reclaim was processed. */ >+static clock_t cache_reclaim_last = 0; >+ > /* > * The number of iterations through arc_evict_*() before we > * drop & reacquire the lock. >@@ -193,9 +203,6 @@ extern int zfs_prefetch_disable; > */ > static boolean_t arc_warm; > >-/* >- * These tunables are for performance analysis. >- */ > uint64_t zfs_arc_max; > uint64_t zfs_arc_min; > uint64_t zfs_arc_meta_limit = 0; >@@ -204,7 +211,27 @@ int zfs_arc_shrink_shift = 0; > int zfs_arc_p_min_shift = 0; > int zfs_disable_dup_eviction = 0; > uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ >+u_int zfs_arc_free_target = 0; >+u_int zfs_arc_cache_target = 0; >+int zfs_arc_cache_period = 10; >+int zfs_arc_cache_partial = 0; >+int zfs_arc_cache_free_period = 300; >+uint64_t zfs_arc_cache_free_max = (1 <<24); /* 16MB */ > >+#ifdef _KERNEL >+static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); >+static int sysctl_vfs_zfs_arc_cache_target(SYSCTL_HANDLER_ARGS); >+ >+static void >+arc_target_init(void *unused __unused) >+{ >+ >+ zfs_arc_free_target = vm_pageout_wakeup_thresh; >+ zfs_arc_cache_target = (vm_pageout_wakeup_thresh / 2) * 3; >+} >+SYSINIT(arc_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, >+ arc_target_init, NULL); >+ > TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); > TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); > TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); >@@ -217,7 +244,75 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ > SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, > &zfs_arc_average_blocksize, 0, > "ARC average blocksize"); >+SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_cache_reclaim_period, CTLFLAG_RWTUN, >+ &zfs_arc_cache_period, 0, >+ "Min number of seconds between ARC cache reclaims"); >+SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_cache_reclaim_partial, CTLFLAG_RWTUN, >+ &zfs_arc_cache_partial, 0, >+ "Enable ARC to perform partial cache reclaims"); >+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_cache_free_max, CTLFLAG_RWTUN, >+ &zfs_arc_cache_free_max, 0, >+ "Maximum free bytes in an ARC cache zone before reclaim will be triggered"); >+SYSCTL_UINT(_vfs_zfs, OID_AUTO, arc_cache_free_period, CTLFLAG_RWTUN, >+ &zfs_arc_cache_free_period, 0, >+ "Min number of seconds between ARC free size based cache reclaims"); >+/* >+ * We don't have a tunable for these sysctls due to their dependency on >+ * pagedaemon initialisation. >+ */ >+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, >+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), >+ sysctl_vfs_zfs_arc_free_target, "IU", >+ "Desired number of free pages below which ARC triggers reclaim"); >+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_cache_target, >+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), >+ sysctl_vfs_zfs_arc_cache_target, "IU", >+ "Desired number of free pages below which ARC triggers cache reclaim"); > >+ >+static int >+sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) >+{ >+ u_int val; >+ int err; >+ >+ val = zfs_arc_free_target; >+ err = sysctl_handle_int(oidp, &val, 0, req); >+ if (err != 0 || req->newptr == NULL) >+ return (err); >+ >+ if (val < minfree) >+ return (EINVAL); >+ if (val > cnt.v_page_count) >+ return (EINVAL); >+ >+ zfs_arc_free_target = val; >+ >+ return (0); >+} >+ >+static int >+sysctl_vfs_zfs_arc_cache_target(SYSCTL_HANDLER_ARGS) >+{ >+ u_int val; >+ int err; >+ >+ val = zfs_arc_cache_target; >+ err = sysctl_handle_int(oidp, &val, 0, req); >+ if (err != 0 || req->newptr == NULL) >+ return (err); >+ >+ if (val < minfree) >+ return (EINVAL); >+ if (val > cnt.v_page_count) >+ return (EINVAL); >+ >+ zfs_arc_cache_target = val; >+ >+ return (0); >+} >+#endif >+ > /* > * Note that buffers can be in one of 6 states: > * ARC_anon - anonymous (discussed below) >@@ -592,6 +687,13 @@ static void arc_evict_ghost(arc_state_t *state, ui > static void arc_buf_watch(arc_buf_t *buf); > #endif /* illumos */ > >+static uint64_t arc_cache_free(void); >+static boolean_t arc_cache_reclaim_needed(uint64_t size); >+static boolean_t arc_cache_reclaim(uint64_t size, >+ arc_cache_reclaim_strategy_t strat); >+static boolean_t arc_cache_reclaim_strat(kmem_cache_t *cache, uint64_t size, >+ arc_cache_reclaim_strategy_t strat); >+ > static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab); > > #define GHOST_STATE(state) \ >@@ -2421,6 +2523,7 @@ arc_flush(spa_t *spa) > void > arc_shrink(void) > { >+ > if (arc_c > arc_c_min) { > uint64_t to_free; > >@@ -2429,6 +2532,9 @@ arc_shrink(void) > #else > to_free = arc_c >> arc_shrink_shift; > #endif >+ DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t, >+ arc_c_min, uint64_t, arc_p, uint64_t, to_free); >+ > if (arc_c > arc_c_min + to_free) > atomic_add_64(&arc_c, -to_free); > else >@@ -2439,12 +2545,19 @@ arc_shrink(void) > arc_c = MAX(arc_size, arc_c_min); > if (arc_p > arc_c) > arc_p = (arc_c >> 1); >+ >+ DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t, >+ arc_p); >+ > ASSERT(arc_c >= arc_c_min); > ASSERT((int64_t)arc_p >= 0); > } > >- if (arc_size > arc_c) >+ if (arc_size > arc_c) { >+ DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, >+ uint64_t, arc_c); > arc_adjust(); >+ } > } > > static int needfree = 0; >@@ -2454,16 +2567,26 @@ arc_reclaim_needed(void) > { > > #ifdef _KERNEL >+ if (arc_size <= arc_c_min) { >+ DTRACE_PROBE2(arc__reclaim_min, uint64_t, arc_size, >+ uint64_t, arc_c_min); >+ return (0); >+ } > >- if (needfree) >+ if (needfree) { >+ DTRACE_PROBE(arc__reclaim_needfree); > return (1); >+ } > > /* > * Cooperate with pagedaemon when it's time for it to scan > * and reclaim some pages. > */ >- if (vm_paging_needed()) >+ if (freemem < zfs_arc_free_target) { >+ DTRACE_PROBE2(arc__reclaim_freemem, uint64_t, >+ freemem, uint64_t, zfs_arc_free_target); > return (1); >+ } > > #ifdef sun > /* >@@ -2491,8 +2614,19 @@ arc_reclaim_needed(void) > if (availrmem < swapfs_minfree + swapfs_reserve + extra) > return (1); > >-#if defined(__i386) > /* >+ * Check that we have enough availrmem that memory locking (e.g., via >+ * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum >+ * stores the number of pages that cannot be locked; when availrmem >+ * drops below pages_pp_maximum, page locking mechanisms such as >+ * page_pp_lock() will fail.) >+ */ >+ if (availrmem <= pages_pp_maximum) >+ return (1); >+ >+#endif /* sun */ >+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) >+ /* > * If we're on an i386 platform, it's possible that we'll exhaust the > * kernel heap space before we ever run out of available physical > * memory. Most checks of the size of the heap_area compare against >@@ -2503,26 +2637,43 @@ arc_reclaim_needed(void) > * heap is allocated. (Or, in the calculation, if less than 1/4th is > * free) > */ >- if (btop(vmem_size(heap_arena, VMEM_FREE)) < >- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) >+ if (vmem_size(heap_arena, VMEM_FREE) < >+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) { >+ DTRACE_PROBE2(arc__reclaim_used, uint64_t, >+ vmem_size(heap_arena, VMEM_FREE), uint64_t, >+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2); > return (1); >+ } > #endif >-#else /* !sun */ >- if (kmem_used() > (kmem_size() * 3) / 4) >+#ifdef sun >+ /* >+ * If zio data pages are being allocated out of a separate heap segment, >+ * then enforce that the size of available vmem for this arena remains >+ * above about 1/16th free. >+ * >+ * Note: The 1/16th arena free requirement was put in place >+ * to aggressively evict memory from the arc in order to avoid >+ * memory fragmentation issues. >+ */ >+ if (zio_arena != NULL && >+ vmem_size(zio_arena, VMEM_FREE) < >+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4)) > return (1); > #endif /* sun */ >- >-#else >+#else /* _KERNEL */ > if (spa_get_random(100) == 0) > return (1); >-#endif >+#endif /* _KERNEL */ >+ DTRACE_PROBE(arc__reclaim_no); >+ > return (0); > } > > extern kmem_cache_t *zio_buf_cache[]; > extern kmem_cache_t *zio_data_buf_cache[]; >+extern kmem_cache_t *range_seg_cache; > >-static void >+static void __noinline > arc_kmem_reap_now(arc_reclaim_strategy_t strat) > { > size_t i; >@@ -2529,6 +2680,7 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) > kmem_cache_t *prev_cache = NULL; > kmem_cache_t *prev_data_cache = NULL; > >+ DTRACE_PROBE(arc__kmem_reap_start); > #ifdef _KERNEL > if (arc_meta_used >= arc_meta_limit) { > /* >@@ -2537,7 +2689,7 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) > */ > dnlc_reduce_cache((void *)(uintptr_t)arc_reduce_dnlc_percent); > } >-#if defined(__i386) >+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) > /* > * Reclaim unused memory from all kmem caches. > */ >@@ -2552,20 +2704,127 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) > if (strat == ARC_RECLAIM_AGGR) > arc_shrink(); > >+ (void) arc_cache_reclaim(0, ARC_CACHE_RECLAIM_FORCE); >+ >+#ifdef sun >+ /* >+ * Ask the vmem areana to reclaim unused memory from its >+ * quantum caches. >+ */ >+ if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) >+ vmem_qcache_reap(zio_arena); >+#endif >+out: >+ DTRACE_PROBE(arc__kmem_reap_end); >+} >+ >+ >+static boolean_t >+arc_cache_reclaim_needed(uint64_t size) >+{ >+ >+ if (zfs_arc_cache_target && freemem < zfs_arc_cache_target + btop(size)) { >+ DTRACE_PROBE1(arc_cache_reclaim_needed, int, B_TRUE); >+ return (B_TRUE); >+ } >+ >+ DTRACE_PROBE1(arc_cache_reclaim_needed, int, B_FALSE); >+ return (B_FALSE); >+} >+ >+static boolean_t >+arc_cache_reclaim_strat(kmem_cache_t *cache, uint64_t size, >+ arc_cache_reclaim_strategy_t strat) >+{ >+ >+ switch(strat) { >+ case ARC_CACHE_RECLAIM_NOW: >+ case ARC_CACHE_RECLAIM_FORCE: >+ kmem_cache_reap_now(cache); >+ if (zfs_arc_cache_partial && !arc_cache_reclaim_needed(size)) >+ return (B_TRUE); >+ break; >+ default: >+ kmem_cache_reap(cache, zfs_arc_cache_free_max); >+ } >+ >+ return (B_FALSE); >+} >+ >+static boolean_t >+arc_cache_reclaim(uint64_t size, arc_cache_reclaim_strategy_t strat) >+{ >+ int i; >+ clock_t now; >+ kmem_cache_t *prev_cache, *prev_data_cache; >+ >+ now = ddi_get_lbolt(); >+ DTRACE_PROBE3(arc_cache_reclaim_test, int, strat, int64_t, now, >+ int64_t, cache_reclaim_last); >+ if (now - cache_reclaim_last > (zfs_arc_cache_period * hz)) >+ return (B_FALSE); >+ >+ DTRACE_PROBE1(arc_cache_reclaim, int, strat); >+ > for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { > if (zio_buf_cache[i] != prev_cache) { > prev_cache = zio_buf_cache[i]; >- kmem_cache_reap_now(zio_buf_cache[i]); >+ if (arc_cache_reclaim_strat(zio_buf_cache[i], size, >+ strat)) { >+ return (B_TRUE); >+ } >+ > } > if (zio_data_buf_cache[i] != prev_data_cache) { > prev_data_cache = zio_data_buf_cache[i]; >- kmem_cache_reap_now(zio_data_buf_cache[i]); >+ if (arc_cache_reclaim_strat(zio_data_buf_cache[i], >+ size, strat)) >+ return (B_TRUE); > } > } >- kmem_cache_reap_now(buf_cache); >- kmem_cache_reap_now(hdr_cache); >+ if (arc_cache_reclaim_strat(range_seg_cache, size, strat)) >+ return (B_TRUE); >+ >+ if (arc_cache_reclaim_strat(buf_cache, size, strat)) >+ return (B_TRUE); >+ >+ arc_cache_reclaim_strat(hdr_cache, size, strat); >+ >+ cache_reclaim_last = ddi_get_lbolt(); >+ >+ if (arc_cache_reclaim_needed(size)) >+ return (B_FALSE); >+ >+ return (B_TRUE); > } > >+static uint64_t >+arc_cache_free(void) >+{ >+ int i; >+ uint64_t cachefree; >+ kmem_cache_t *prev_cache, *prev_data_cache; >+ >+ cachefree = kmem_cache_free_size(buf_cache) + >+ kmem_cache_free_size(hdr_cache); >+ >+ prev_cache = NULL; >+ prev_data_cache = NULL; >+ >+ for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { >+ if (zio_buf_cache[i] != prev_cache) { >+ prev_cache = zio_buf_cache[i]; >+ cachefree += kmem_cache_free_size(zio_buf_cache[i]); >+ } >+ if (zio_data_buf_cache[i] != prev_data_cache) { >+ prev_data_cache = zio_data_buf_cache[i]; >+ cachefree += kmem_cache_free_size(zio_data_buf_cache[i]); >+ } >+ } >+ >+ return (cachefree); >+} >+ > static void > arc_reclaim_thread(void *dummy __unused) > { >@@ -2577,10 +2836,28 @@ arc_reclaim_thread(void *dummy __unused) > > mutex_enter(&arc_reclaim_thr_lock); > while (arc_thread_exit == 0) { >+ DTRACE_PROBE(arc__reclaim_thread); >+ if (arc_cache_reclaim_needed(0)) { >+ (void) arc_cache_reclaim(0, ARC_CACHE_RECLAIM_NOW); >+ } else { >+ clock_t now; >+ >+ now = ddi_get_lbolt(); >+ DTRACE_PROBE2(arc__reclaim_check, int64_t, now, >+ int64_t, cache_reclaim_last); >+ if (now - cache_reclaim_last > >+ (zfs_arc_cache_free_period * hz)) { >+ (void) arc_cache_reclaim(0, >+ ARC_CACHE_RECLAIM_SIZE); >+ } >+ } >+ > if (arc_reclaim_needed()) { >- >+ DTRACE_PROBE1(arc__caches_free, uint64_t, >+ arc_cache_free()); > if (arc_no_grow) { > if (last_reclaim == ARC_RECLAIM_CONS) { >+ DTRACE_PROBE(arc__reclaim_aggr_no_grow); > last_reclaim = ARC_RECLAIM_AGGR; > } else { > last_reclaim = ARC_RECLAIM_CONS; >@@ -2588,6 +2865,7 @@ arc_reclaim_thread(void *dummy __unused) > } else { > arc_no_grow = TRUE; > last_reclaim = ARC_RECLAIM_AGGR; >+ DTRACE_PROBE(arc__reclaim_aggr); > membar_producer(); > } > >@@ -2602,6 +2880,7 @@ arc_reclaim_thread(void *dummy __unused) > */ > arc_no_grow = TRUE; > last_reclaim = ARC_RECLAIM_AGGR; >+ DTRACE_PROBE(arc__reclaim_aggr_needfree); > } > arc_kmem_reap_now(last_reclaim); > arc_warm = B_TRUE; >@@ -2618,6 +2897,7 @@ arc_reclaim_thread(void *dummy __unused) > #ifdef _KERNEL > if (needfree) { > needfree = 0; >+ DTRACE_PROBE(arc__clear_needfree); > wakeup(&needfree); > } > #endif >@@ -2692,6 +2972,7 @@ arc_adapt(int bytes, arc_state_t *state) > * cache size, increment the target cache size > */ > if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { >+ DTRACE_PROBE1(arc__inc_adapt, int, bytes); > atomic_add_64(&arc_c, (int64_t)bytes); > if (arc_c > arc_c_max) > arc_c = arc_c_max; >@@ -2713,20 +2994,6 @@ arc_evict_needed(arc_buf_contents_t type) > if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) > return (1); > >-#ifdef sun >-#ifdef _KERNEL >- /* >- * If zio data pages are being allocated out of a separate heap segment, >- * then enforce that the size of available vmem for this area remains >- * above about 1/32nd free. >- */ >- if (type == ARC_BUFC_DATA && zio_arena != NULL && >- vmem_size(zio_arena, VMEM_FREE) < >- (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) >- return (1); >-#endif >-#endif /* sun */ >- > if (arc_reclaim_needed()) > return (1); > >@@ -2763,6 +3030,9 @@ arc_get_data_buf(arc_buf_t *buf) > uint64_t size = buf->b_hdr->b_size; > arc_buf_contents_t type = buf->b_hdr->b_type; > >+ if (arc_cache_reclaim_needed(size)) >+ (void) arc_cache_reclaim(size, ARC_CACHE_RECLAIM_NOW); >+ > arc_adapt(size, state); > > /* >@@ -3885,20 +4155,16 @@ static int > arc_memory_throttle(uint64_t reserve, uint64_t txg) > { > #ifdef _KERNEL >- uint64_t available_memory = >- ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count); >+ uint64_t available_memory = ptob(freemem); > static uint64_t page_load = 0; > static uint64_t last_txg = 0; > >-#ifdef sun >-#if defined(__i386) >+#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) > available_memory = >- MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); >+ MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); > #endif >-#endif /* sun */ > >- if (cnt.v_free_count + cnt.v_cache_count > >- (uint64_t)physmem * arc_lotsfree_percent / 100) >+ if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) > return (0); > > if (txg > last_txg) { >@@ -3911,7 +4177,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg > * continue to let page writes occur as quickly as possible. > */ > if (curproc == pageproc) { >- if (page_load > available_memory / 4) >+ if (page_load > MAX(ptob(minfree), available_memory) / 4) > return (SET_ERROR(ERESTART)); > /* Note: reserve is inflated, so we deflate */ > page_load += reserve / 8; >@@ -3939,8 +4205,10 @@ arc_tempreserve_space(uint64_t reserve, uint64_t t > int error; > uint64_t anon_size; > >- if (reserve > arc_c/4 && !arc_no_grow) >+ if (reserve > arc_c/4 && !arc_no_grow) { > arc_c = MIN(arc_c_max, reserve * 4); >+ DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c); >+ } > if (reserve > arc_c) > return (SET_ERROR(ENOMEM)); > >@@ -3994,6 +4262,7 @@ arc_lowmem(void *arg __unused, int howto __unused) > mutex_enter(&arc_lowmem_lock); > mutex_enter(&arc_reclaim_thr_lock); > needfree = 1; >+ DTRACE_PROBE(arc__needfree); > cv_signal(&arc_reclaim_thr_cv); > > /* >Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c >=================================================================== >--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c (revision 272005) >+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c (working copy) >@@ -33,7 +33,7 @@ > #include <sys/zio.h> > #include <sys/range_tree.h> > >-static kmem_cache_t *range_seg_cache; >+kmem_cache_t *range_seg_cache; > > void > range_tree_init(void) >Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c >=================================================================== >--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (revision 272005) >+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (working copy) >@@ -312,6 +312,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) > ASSERT(MUTEX_HELD(&vq->vq_lock)); > ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); > avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio); >+ DTRACE_PROBE3(vdev_queue, vdev_queue_t *, vq, zio_t *, zio, uint64_t, >+ avl_numnodes(&vq->vq_class[zio->io_priority].vqc_queued_tree)); > > #ifdef illumos > mutex_enter(&spa->spa_iokstat_lock); >Index: sys/vm/uma.h >=================================================================== >--- sys/vm/uma.h (revision 272005) >+++ sys/vm/uma.h (working copy) >@@ -636,6 +636,17 @@ int uma_zone_exhausted(uma_zone_t zone); > int uma_zone_exhausted_nolock(uma_zone_t zone); > > /* >+ * Used to determine the amount of memory consumed by a zone's free space. >+ * >+ * Arguments: >+ * zone The zone to determine the free space of. >+ * >+ * Returns: >+ * uint64_t The amount of memory consumed by the zone's free space. >+ */ >+uint64_t uma_zone_free_size(uma_zone_t zone); >+ >+/* > * Common UMA_ZONE_PCPU zones. > */ > extern uma_zone_t pcpu_zone_64; >Index: sys/vm/uma_core.c >=================================================================== >--- sys/vm/uma_core.c (revision 272005) >+++ sys/vm/uma_core.c (working copy) >@@ -3312,6 +3312,33 @@ uma_print_zone(uma_zone_t zone) > } > } > >+uint64_t >+uma_zone_free_size(uma_zone_t zone) >+{ >+ int cpu; >+ uint64_t cachefree; >+ uma_bucket_t bucket; >+ uma_cache_t cache; >+ >+ cachefree = 0; >+ >+ ZONE_LOCK(zone); >+ LIST_FOREACH(bucket, &zone->uz_buckets, ub_link) >+ cachefree += (uint64_t)bucket->ub_cnt; >+ >+ CPU_FOREACH(cpu) { >+ cache = &zone->uz_cpu[cpu]; >+ if (cache->uc_allocbucket != NULL) >+ cachefree += (uint64_t)cache->uc_allocbucket->ub_cnt; >+ if (cache->uc_freebucket != NULL) >+ cachefree += (uint64_t)cache->uc_freebucket->ub_cnt; >+ } >+ cachefree *= zone->uz_size; >+ ZONE_UNLOCK(zone); >+ >+ return (cachefree); >+} >+ > #ifdef DDB > /* > * Generate statistics across both the zone and its per-cpu cache's. Return >Index: sys/vm/vm_pageout.c >=================================================================== >--- sys/vm/vm_pageout.c (revision 272005) >+++ sys/vm/vm_pageout.c (working copy) >@@ -76,6 +76,7 @@ > __FBSDID("$FreeBSD$"); > > #include "opt_vm.h" >+#include "opt_kdtrace.h" > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/kernel.h> >@@ -89,6 +90,7 @@ __FBSDID("$FreeBSD$"); > #include <sys/racct.h> > #include <sys/resourcevar.h> > #include <sys/sched.h> >+#include <sys/sdt.h> > #include <sys/signalvar.h> > #include <sys/smp.h> > #include <sys/vnode.h> >@@ -115,10 +117,14 @@ __FBSDID("$FreeBSD$"); > > /* the kernel process "vm_pageout"*/ > static void vm_pageout(void); >+static void vm_pageout_init(void); > static int vm_pageout_clean(vm_page_t); > static void vm_pageout_scan(struct vm_domain *vmd, int pass); > static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass); > >+SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, >+ NULL); >+ > struct proc *pageproc; > > static struct kproc_desc page_kp = { >@@ -126,9 +132,13 @@ static struct kproc_desc page_kp = { > vm_pageout, > &pageproc > }; >-SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, >+SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, > &page_kp); > >+SDT_PROVIDER_DEFINE(vm); >+SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache); >+SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan); >+ > #if !defined(NO_SWAPPING) > /* the kernel process "vm_daemon"*/ > static void vm_daemon(void); >@@ -663,6 +673,7 @@ vm_pageout_grow_cache(int tries, vm_paddr_t low, v > * may acquire locks and/or sleep, so they can only be invoked > * when "tries" is greater than zero. > */ >+ SDT_PROBE0(vm, , , vm__lowmem_cache); > EVENTHANDLER_INVOKE(vm_lowmem, 0); > > /* >@@ -921,10 +932,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) > * some. We rate limit to avoid thrashing. > */ > if (vmd == &vm_dom[0] && pass > 0 && >- lowmem_ticks + (lowmem_period * hz) < ticks) { >+ (ticks - lowmem_ticks) / hz >= lowmem_period) { > /* > * Decrease registered cache sizes. > */ >+ SDT_PROBE0(vm, , , vm__lowmem_scan); > EVENTHANDLER_INVOKE(vm_lowmem, 0); > /* > * We do this explicitly after the caches have been >@@ -1650,15 +1662,11 @@ vm_pageout_worker(void *arg) > } > > /* >- * vm_pageout is the high level pageout daemon. >+ * vm_pageout_init initialises basic pageout daemon settings. > */ > static void >-vm_pageout(void) >+vm_pageout_init(void) > { >-#if MAXMEMDOM > 1 >- int error, i; >-#endif >- > /* > * Initialize some paging parameters. > */ >@@ -1704,7 +1712,18 @@ static void > /* XXX does not really belong here */ > if (vm_page_max_wired == 0) > vm_page_max_wired = cnt.v_free_count / 3; >+} > >+/* >+ * vm_pageout is the high level pageout daemon. >+ */ >+static void >+vm_pageout(void) >+{ >+#if MAXMEMDOM > 1 >+ int error, i; >+#endif >+ > swap_pager_swap_init(); > #if MAXMEMDOM > 1 > for (i = 1; i < vm_ndomains; i++) {
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 187594
:
140882
|
140883
|
140884
|
140885
|
140886
|
140887
|
140888
|
140889
|
140890
|
140891
|
140892
|
146178
|
146203
|
146249
|
146251
|
146287
|
146300
|
146373
|
146423
|
146424
|
146456
|
146816
|
146817
|
146851
|
146852
|
146854
|
146859
|
146861
|
146946
|
146947
|
146948
|
146949
|
147014
|
147068
|
147069
|
147070
|
147265
|
147274
|
147275
|
147276
|
147286
|
147459
|
147607
|
147609
|
147733
|
147738
|
147754
|
147815
|
152852
|
158809
|
159207
|
159688
|
159859
|
159905
|
161691
|
161692
|
161943
|
164051
|
174197
|
174198
|
174231
|
174232
|
174254
|
186818