Index: cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 304971) +++ cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) @@ -237,7 +237,17 @@ int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ u_int zfs_arc_free_target = 0; +u_int zfs_arc_wakeup_pager = 0; +u_int zfs_arc_wakeup_delay = 500; +int zfs_arc_last_slab = 0; +#define WAKE_PAGER +#ifdef WAKE_PAGER +#define WAKE_PAGER_CONSTANT 10 / 9 /* Pager wakeup threshold */ +static int arc_init_done = 0; /* After arc_warm is valid */ +extern void pagedaemon_wakeup(void); +#endif + /* Absolute min for arc min / max is 16MB. */ static uint64_t arc_abs_min = 16 << 20; @@ -251,7 +261,8 @@ static void arc_free_target_init(void *unused __unused) { - zfs_arc_free_target = vm_pageout_wakeup_thresh; + zfs_arc_free_target = vm_pageout_wakeup_thresh + (vm_pageout_wakeup_thresh / 20); + zfs_arc_wakeup_pager = vm_pageout_wakeup_thresh * WAKE_PAGER_CONSTANT; } SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, arc_free_target_init, NULL); @@ -3475,7 +3486,15 @@ int64_t arc_pages_pp_reserve = 64; */ int64_t arc_swapfs_reserve = 64; +#ifdef WAKE_PAGER /* + * Declare file-local static for event processor bypass and forward functions + */ +static unsigned int arc_no_wake_event = 0; +static void arc_kmem_reap_now(int); +#endif + +/* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates * the amount of memory that needs to be freed up. @@ -3488,6 +3507,11 @@ arc_available_memory(void) free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL +#ifdef WAKE_PAGER + sbintime_t now; + static sbintime_t last_pagedaemon_wake = 0; +#endif /* WAKE_PAGER */ + if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { @@ -3495,6 +3519,37 @@ arc_available_memory(void) r = FMR_NEEDFREE; } } +#ifdef WAKE_PAGER +/* + * When arc is initialized, perform the following: + * + * 1. If we are in the "memory is low enough to wake the pager" zone, + * reap the kernel UMA caches once per wakeup_delay period 500ms default) + * AND wake the pager up (so it can demote pages from inactive to cache to + * ultimately the free list.) + * + * 2. If we're below VM's free_target in free RAM reap *one* UMA zone per + * time period (500ms). + * + */ + if (arc_init_done) { + now = getsbinuptime(); + if ((now - last_pagedaemon_wake) / SBT_1MS > zfs_arc_wakeup_delay) { + last_pagedaemon_wake = now; + arc_no_wake_event++; /* Set bypass flag for ARC */ + if ( ( ((int64_t) freemem - zfs_arc_wakeup_pager) < 0) && (arc_warm == B_TRUE) ) { + arc_kmem_reap_now(0); /* Reap caches if we're close */ + DTRACE_PROBE(arc__wake_pagedaemon); + (void) pagedaemon_wakeup(); /* Wake the pager */ + } else { + if ( ((int64_t) freemem - vm_cnt.v_free_target) < 0) { + arc_kmem_reap_now(1); /* Reap one cache if lots of memory */ + DTRACE_PROBE2(arc__reap_one, int, zfs_arc_last_slab, int, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); + } + } + } + } +#endif /* WAKE_PAGER */ /* * Cooperate with pagedaemon when it's time for it to scan @@ -3633,12 +3688,22 @@ extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *range_seg_cache; +/* + * Pass a flag to this routine; if zero, then reap all. If not then reap + * one slab on a rotating basis. This allows a low-rate call to be used + * on a routine, maintenance basis even when not terribly low on RAM so + * we don't have huge amounts of RAM out in unused UMA allocations. + */ static __noinline void -arc_kmem_reap_now(void) +arc_kmem_reap_now(flag) +int flag; { size_t i; kmem_cache_t *prev_cache = NULL; kmem_cache_t *prev_data_cache = NULL; + int arc_cache_reaped = 0; + int arc_data_cache_reaped = 0; + int reset_last_slab = 0; DTRACE_PROBE(arc__kmem_reap_start); #ifdef _KERNEL @@ -3660,13 +3725,28 @@ static __noinline void for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { if (zio_buf_cache[i] != prev_cache) { prev_cache = zio_buf_cache[i]; - kmem_cache_reap_now(zio_buf_cache[i]); + if ((!flag) || ((i > zfs_arc_last_slab) && (!arc_cache_reaped))) { + kmem_cache_reap_now(zio_buf_cache[i]); + arc_cache_reaped++; + } } if (zio_data_buf_cache[i] != prev_data_cache) { prev_data_cache = zio_data_buf_cache[i]; - kmem_cache_reap_now(zio_data_buf_cache[i]); + if ((!flag) || ((i > zfs_arc_last_slab) && (!arc_data_cache_reaped))) { + kmem_cache_reap_now(zio_data_buf_cache[i]); + arc_data_cache_reaped++; + } } + if (flag && (!reset_last_slab) && (arc_cache_reaped || arc_data_cache_reaped)) { + reset_last_slab = i; + } } + if (reset_last_slab) { + zfs_arc_last_slab = reset_last_slab; + } + if ((!arc_cache_reaped) && (!arc_data_cache_reaped) && (flag)) { /* Found nothing to reap on one-pass */ + zfs_arc_last_slab = 0; /* Reset */ + } kmem_cache_reap_now(buf_cache); kmem_cache_reap_now(hdr_full_cache); kmem_cache_reap_now(hdr_l2only_cache); @@ -3726,7 +3806,7 @@ arc_reclaim_thread(void *dummy __unused) */ growtime = gethrtime() + SEC2NSEC(arc_grow_retry); - arc_kmem_reap_now(); + arc_kmem_reap_now(0); /* * If we are still low on memory, shrink the ARC @@ -5431,6 +5511,10 @@ static eventhandler_tag arc_event_lowmem = NULL; static void arc_lowmem(void *arg __unused, int howto __unused) { + if (arc_no_wake_event) { /* Don't do it if we woke the pager */ + arc_no_wake_event = 0; /* Just clear the flag */ + return; + } mutex_enter(&arc_reclaim_lock); /* XXX: Memory deficit should be passed as argument. */ @@ -5696,6 +5780,9 @@ arc_init(void) printf(" in /boot/loader.conf.\n"); } #endif +#ifdef WAKE_PAGER + arc_init_done++; +#endif /* WAKE_PAGER */ } void