FreeBSD Bugzilla – Attachment 146813 Details for
Bug 191510
[zfs] ZFS doesn't use all available memory
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
arc reclaim refactor (against releng/9.3)
arc-reclaim-93.patch (text/plain), 9.46 KB, created by
Steven Hartland
on 2014-09-04 18:25:07 UTC
(
hide
)
Description:
arc reclaim refactor (against releng/9.3)
Filename:
MIME Type:
Creator:
Steven Hartland
Created:
2014-09-04 18:25:07 UTC
Size:
9.46 KB
patch
obsolete
>Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c >=================================================================== >--- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (revision 271017) >+++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (working copy) >@@ -132,12 +132,25 @@ kmem_size(void) > } > > uint64_t >-kmem_used(void) >+kmem_map_used(void) > { > > return (kmem_map->size); > } > >+uint64_t >+kmem_map_free(void) >+{ >+ uint64_t size; >+ >+ vm_map_lock_read(kmem_map); >+ size = kmem_map->root != NULL ? kmem_map->root->max_free : >+ kmem_map->max_offset - kmem_map->min_offset; >+ vm_map_unlock_read(kmem_map); >+ >+ return (size); >+} >+ > static int > kmem_std_constructor(void *mem, int size __unused, void *private, int flags) > { >Index: sys/cddl/compat/opensolaris/sys/kmem.h >=================================================================== >--- sys/cddl/compat/opensolaris/sys/kmem.h (revision 271017) >+++ sys/cddl/compat/opensolaris/sys/kmem.h (working copy) >@@ -66,7 +66,8 @@ typedef struct kmem_cache { > void *zfs_kmem_alloc(size_t size, int kmflags); > void zfs_kmem_free(void *buf, size_t size); > uint64_t kmem_size(void); >-uint64_t kmem_used(void); >+uint64_t kmem_map_used(void); >+uint64_t kmem_map_free(void); > kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, > int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), > void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); >@@ -78,6 +79,8 @@ void kmem_reap(void); > int kmem_debugging(void); > void *calloc(size_t n, size_t s); > >+#define freemem (cnt.v_free_count + cnt.v_cache_count) >+#define minfree cnt.v_free_min > #define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags)) > #define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO) > #define kmem_free(buf, size) zfs_kmem_free((buf), (size)) >Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c >=================================================================== >--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 271017) >+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) >@@ -193,9 +193,6 @@ extern int zfs_prefetch_disable; > */ > static boolean_t arc_warm; > >-/* >- * These tunables are for performance analysis. >- */ > uint64_t zfs_arc_max; > uint64_t zfs_arc_min; > uint64_t zfs_arc_meta_limit = 0; >@@ -203,7 +200,21 @@ int zfs_arc_grow_retry = 0; > int zfs_arc_shrink_shift = 0; > int zfs_arc_p_min_shift = 0; > int zfs_disable_dup_eviction = 0; >+u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */ > >+static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); >+ >+#ifdef _KERNEL >+static void >+arc_free_target_init(void *unused __unused) >+{ >+ >+ zfs_arc_free_target = cnt.v_free_reserved + cnt.v_cache_min; >+} >+SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, >+ arc_free_target_init, NULL); >+#endif >+ > TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); > TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); > TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); >@@ -214,6 +225,36 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ > "Minimum ARC size"); > > /* >+ * We don't have a tunable for arc_free_target due to the dependency on >+ * pagedaemon initialisation. >+ */ >+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, >+ CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), >+ sysctl_vfs_zfs_arc_free_target, "IU", >+ "Desired number of free pages below which ARC triggers reclaim"); >+ >+static int >+sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) >+{ >+ u_int val; >+ int err; >+ >+ val = zfs_arc_free_target; >+ err = sysctl_handle_int(oidp, &val, 0, req); >+ if (err != 0 || req->newptr == NULL) >+ return (err); >+ >+ if (val < minfree) >+ return (EINVAL); >+ if (val > cnt.v_page_count) >+ return (EINVAL); >+ >+ zfs_arc_free_target = val; >+ >+ return (0); >+} >+ >+/* > * Note that buffers can be in one of 6 states: > * ARC_anon - anonymous (discussed below) > * ARC_mru - recently used, currently cached >@@ -2405,9 +2446,12 @@ arc_flush(spa_t *spa) > void > arc_shrink(void) > { >+ > if (arc_c > arc_c_min) { > uint64_t to_free; > >+ DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, >+ arc_c_min); > #ifdef _KERNEL > to_free = arc_c >> arc_shrink_shift; > #else >@@ -2427,8 +2471,11 @@ arc_shrink(void) > ASSERT((int64_t)arc_p >= 0); > } > >- if (arc_size > arc_c) >+ if (arc_size > arc_c) { >+ DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, >+ uint64_t, arc_c); > arc_adjust(); >+ } > } > > static int needfree = 0; >@@ -2439,15 +2486,20 @@ arc_reclaim_needed(void) > > #ifdef _KERNEL > >- if (needfree) >+ if (needfree) { >+ DTRACE_PROBE(arc__reclaim_needfree); > return (1); >+ } > > /* > * Cooperate with pagedaemon when it's time for it to scan > * and reclaim some pages. > */ >- if (vm_paging_needed()) >+ if (freemem < zfs_arc_free_target) { >+ DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t, >+ freemem, uint64_t, zfs_arc_free_target); > return (1); >+ } > > #ifdef sun > /* >@@ -2487,19 +2539,41 @@ arc_reclaim_needed(void) > * heap is allocated. (Or, in the calculation, if less than 1/4th is > * free) > */ >- if (btop(vmem_size(heap_arena, VMEM_FREE)) < >- (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) >+ if (vmem_size(heap_arena, VMEM_FREE) < >+ (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) > return (1); > #endif >+ >+ /* >+ * If zio data pages are being allocated out of a separate heap segment, >+ * then enforce that the size of available vmem for this arena remains >+ * above about 1/16th free. >+ * >+ * Note: The 1/16th arena free requirement was put in place >+ * to aggressively evict memory from the arc in order to avoid >+ * memory fragmentation issues. >+ */ >+ if (zio_arena != NULL && >+ vmem_size(zio_arena, VMEM_FREE) < >+ (vmem_size(zio_arena, VMEM_ALLOC) >> 4)) >+ return (1); > #else /* !sun */ >- if (kmem_used() > (kmem_size() * 3) / 4) >+#ifndef UMA_MD_SMALL_ALLOC >+ /* i386 has KVA limits that the raw page counts above don't consider */ >+ if (kmem_map_used() > (kmem_size() * 3) / 4) { >+ DTRACE_PROBE2(arc__reclaim_used, uint64_t, >+ kmem_map_used(), uint64_t, (kmem_size() * 3) / 4); > return (1); >+ } >+#endif > #endif /* sun */ > >-#else >+#else /* !_KERNEL */ > if (spa_get_random(100) == 0) > return (1); >-#endif >+#endif /* _KERNEL */ >+ DTRACE_PROBE(arc__reclaim_no); >+ > return (0); > } > >@@ -2697,20 +2771,6 @@ arc_evict_needed(arc_buf_contents_t type) > if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) > return (1); > >-#ifdef sun >-#ifdef _KERNEL >- /* >- * If zio data pages are being allocated out of a separate heap segment, >- * then enforce that the size of available vmem for this area remains >- * above about 1/32nd free. >- */ >- if (type == ARC_BUFC_DATA && zio_arena != NULL && >- vmem_size(zio_arena, VMEM_FREE) < >- (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) >- return (1); >-#endif >-#endif /* sun */ >- > if (arc_reclaim_needed()) > return (1); > >@@ -3876,8 +3936,7 @@ static int > arc_memory_throttle(uint64_t reserve, uint64_t txg) > { > #ifdef _KERNEL >- uint64_t available_memory = >- ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count); >+ uint64_t available_memory = ptob(freemem); > static uint64_t page_load = 0; > static uint64_t last_txg = 0; > >@@ -3886,10 +3945,13 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg > available_memory = > MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); > #endif >+#else /* sun */ >+#ifndef UMA_MD_SMALL_ALLOC >+ available_memory = MIN(available_memory, kmem_map_free()); >+#endif > #endif /* sun */ > >- if (cnt.v_free_count + cnt.v_cache_count > >- (uint64_t)physmem * arc_lotsfree_percent / 100) >+ if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) > return (0); > > if (txg > last_txg) { >@@ -3902,7 +3964,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg > * continue to let page writes occur as quickly as possible. > */ > if (curproc == pageproc) { >- if (page_load > available_memory / 4) >+ if (page_load > MAX(ptob(minfree), available_memory) / 4) > return (SET_ERROR(ERESTART)); > /* Note: reserve is inflated, so we deflate */ > page_load += reserve / 8; >Index: sys/vm/vm_pageout.c >=================================================================== >--- sys/vm/vm_pageout.c (revision 271017) >+++ sys/vm/vm_pageout.c (working copy) >@@ -112,9 +112,13 @@ __FBSDID("$FreeBSD$"); > > /* the kernel process "vm_pageout"*/ > static void vm_pageout(void); >+static void vm_pageout_init(void); > static int vm_pageout_clean(vm_page_t); > static void vm_pageout_scan(int pass); > >+SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, >+ NULL); >+ > struct proc *pageproc; > > static struct kproc_desc page_kp = { >@@ -122,7 +126,7 @@ static struct kproc_desc page_kp = { > vm_pageout, > &pageproc > }; >-SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, >+SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, > &page_kp); > > #if !defined(NO_SWAPPING) >@@ -1506,13 +1510,11 @@ vm_pageout_page_stats() > } > > /* >- * vm_pageout is the high level pageout daemon. >+ * vm_pageout_init initialises basic pageout daemon settings. > */ > static void >-vm_pageout() >+vm_pageout_init() > { >- int error, pass; >- > /* > * Initialize some paging parameters. > */ >@@ -1579,7 +1581,16 @@ static void > vm_pageout_stats_interval = 5; > if (vm_pageout_full_stats_interval == 0) > vm_pageout_full_stats_interval = vm_pageout_stats_interval * 4; >+} > >+/* >+ * vm_pageout is the high level pageout daemon. >+ */ >+static void >+vm_pageout() >+{ >+ int error, pass; >+ > swap_pager_swap_init(); > pass = 0; > /*
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 191510
:
144730
|
144771
|
146813
|
146856
|
146858