Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c =================================================================== --- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (revision 270875) +++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (working copy) @@ -126,42 +126,6 @@ kmem_size_init(void *unused __unused) } SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL); -/* - * The return values from kmem_free_* are only valid once the pagedaemon - * has been initialised, before then they return 0. - * - * To ensure the returns are valid the caller can use a SYSINIT with - * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least - * SI_ORDER_SECOND. - */ -u_int -kmem_free_target(void) -{ - - return (vm_cnt.v_free_target); -} - -u_int -kmem_free_min(void) -{ - - return (vm_cnt.v_free_min); -} - -u_int -kmem_free_count(void) -{ - - return (vm_cnt.v_free_count + vm_cnt.v_cache_count); -} - -u_int -kmem_page_count(void) -{ - - return (vm_cnt.v_page_count); -} - uint64_t kmem_size(void) { @@ -169,13 +133,6 @@ kmem_size(void) return (kmem_size_val); } -uint64_t -kmem_used(void) -{ - - return (vmem_size(kmem_arena, VMEM_ALLOC)); -} - static int kmem_std_constructor(void *mem, int size __unused, void *private, int flags) { Index: sys/cddl/compat/opensolaris/sys/kmem.h =================================================================== --- sys/cddl/compat/opensolaris/sys/kmem.h (revision 270875) +++ sys/cddl/compat/opensolaris/sys/kmem.h (working copy) @@ -66,17 +66,6 @@ typedef struct kmem_cache { void *zfs_kmem_alloc(size_t size, int kmflags); void zfs_kmem_free(void *buf, size_t size); uint64_t kmem_size(void); -uint64_t kmem_used(void); -u_int kmem_page_count(void); - -/* - * The return values from kmem_free_* are only valid once the pagedaemon - * has been initialised, before then they return 0. - */ -u_int kmem_free_count(void); -u_int kmem_free_target(void); -u_int kmem_free_min(void); - kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); @@ -88,6 +77,9 @@ void kmem_reap(void); int kmem_debugging(void); void *calloc(size_t n, size_t s); +#define freemem (vm_cnt.v_free_count + vm_cnt.v_cache_count) +#define minfree vm_cnt.v_free_min +#define heap_arena kmem_arena #define kmem_alloc(size, kmflags) zfs_kmem_alloc((size), (kmflags)) #define kmem_zalloc(size, kmflags) zfs_kmem_alloc((size), (kmflags) | M_ZERO) #define kmem_free(buf, size) zfs_kmem_free((buf), (size)) Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 270875) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) @@ -201,7 +201,7 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ -u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */ +u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */ static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); @@ -210,7 +210,7 @@ static void arc_free_target_init(void *unused __unused) { - zfs_arc_free_target = kmem_free_target(); + zfs_arc_free_target = vm_pageout_wakeup_thresh; } SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, arc_free_target_init, NULL); @@ -245,9 +245,9 @@ sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS if (err != 0 || req->newptr == NULL) return (err); - if (val < kmem_free_min()) + if (val < minfree) return (EINVAL); - if (val > kmem_page_count()) + if (val > vm_cnt.v_page_count) return (EINVAL); zfs_arc_free_target = val; @@ -2503,18 +2503,13 @@ arc_reclaim_needed(void) return (1); } - if (kmem_free_count() < zfs_arc_free_target) { - DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t, - kmem_free_count(), uint64_t, zfs_arc_free_target); - return (1); - } - /* * Cooperate with pagedaemon when it's time for it to scan * and reclaim some pages. */ - if (vm_paging_needed()) { - DTRACE_PROBE(arc__reclaim_paging); + if (freemem < zfs_arc_free_target) { + DTRACE_PROBE2(arc__reclaim_freemem, uint64_t, + freemem, uint64_t, zfs_arc_free_target); return (1); } @@ -2544,8 +2539,19 @@ arc_reclaim_needed(void) if (availrmem < swapfs_minfree + swapfs_reserve + extra) return (1); -#if defined(__i386) /* + * Check that we have enough availrmem that memory locking (e.g., via + * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum + * stores the number of pages that cannot be locked; when availrmem + * drops below pages_pp_maximum, page locking mechanisms such as + * page_pp_lock() will fail.) + */ + if (availrmem <= pages_pp_maximum) + return (1); + +#endif /* sun */ +#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) + /* * If we're on an i386 platform, it's possible that we'll exhaust the * kernel heap space before we ever run out of available physical * memory. Most checks of the size of the heap_area compare against @@ -2556,25 +2562,33 @@ arc_reclaim_needed(void) * heap is allocated. (Or, in the calculation, if less than 1/4th is * free) */ - if (btop(vmem_size(heap_arena, VMEM_FREE)) < - (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) - return (1); -#endif -#else /* sun */ -#ifdef __i386__ - /* i386 has KVA limits that the raw page counts above don't consider */ - if (kmem_used() > (kmem_size() * 3) / 4) { + if (vmem_size(heap_arena, VMEM_FREE) < + (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) { DTRACE_PROBE2(arc__reclaim_used, uint64_t, - kmem_used(), uint64_t, (kmem_size() * 3) / 4); + vmem_size(heap_arena, VMEM_FREE), uint64_t, + (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2); return (1); } #endif +#ifdef sun + /* + * If zio data pages are being allocated out of a separate heap segment, + * then enforce that the size of available vmem for this arena remains + * above about 1/16th free. + * + * Note: The 1/16th arena free requirement was put in place + * to aggressively evict memory from the arc in order to avoid + * memory fragmentation issues. + */ + if (zio_arena != NULL && + vmem_size(zio_arena, VMEM_FREE) < + (vmem_size(zio_arena, VMEM_ALLOC) >> 4)) + return (1); #endif /* sun */ - -#else +#else /* _KERNEL */ if (spa_get_random(100) == 0) return (1); -#endif +#endif /* _KERNEL */ DTRACE_PROBE(arc__reclaim_no); return (0); @@ -2625,6 +2639,15 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat) } kmem_cache_reap_now(buf_cache); kmem_cache_reap_now(hdr_cache); + +#ifdef sun + /* + * Ask the vmem areana to reclaim unused memory from its + * quantum caches. + */ + if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) + vmem_qcache_reap(zio_arena); +#endif } static void @@ -2774,20 +2797,6 @@ arc_evict_needed(arc_buf_contents_t type) if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) return (1); -#ifdef sun -#ifdef _KERNEL - /* - * If zio data pages are being allocated out of a separate heap segment, - * then enforce that the size of available vmem for this area remains - * above about 1/32nd free. - */ - if (type == ARC_BUFC_DATA && zio_arena != NULL && - vmem_size(zio_arena, VMEM_FREE) < - (vmem_size(zio_arena, VMEM_ALLOC) >> 5)) - return (1); -#endif -#endif /* sun */ - if (arc_reclaim_needed()) return (1); @@ -3946,20 +3955,16 @@ static int arc_memory_throttle(uint64_t reserve, uint64_t txg) { #ifdef _KERNEL - uint64_t available_memory = - ptoa((uintmax_t)vm_cnt.v_free_count + vm_cnt.v_cache_count); + uint64_t available_memory = ptob(freemem); static uint64_t page_load = 0; static uint64_t last_txg = 0; -#ifdef sun -#if defined(__i386) +#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) available_memory = - MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); + MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); #endif -#endif /* sun */ - if (vm_cnt.v_free_count + vm_cnt.v_cache_count > - (uint64_t)physmem * arc_lotsfree_percent / 100) + if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) return (0); if (txg > last_txg) { @@ -3972,7 +3977,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg * continue to let page writes occur as quickly as possible. */ if (curproc == pageproc) { - if (page_load > available_memory / 4) + if (page_load > MAX(ptob(minfree), available_memory) / 4) return (SET_ERROR(ERESTART)); /* Note: reserve is inflated, so we deflate */ page_load += reserve / 8;