Lines 193-201
extern int zfs_prefetch_disable;
Link Here
|
193 |
*/ |
193 |
*/ |
194 |
static boolean_t arc_warm; |
194 |
static boolean_t arc_warm; |
195 |
|
195 |
|
196 |
/* |
|
|
197 |
* These tunables are for performance analysis. |
198 |
*/ |
199 |
uint64_t zfs_arc_max; |
196 |
uint64_t zfs_arc_max; |
200 |
uint64_t zfs_arc_min; |
197 |
uint64_t zfs_arc_min; |
201 |
uint64_t zfs_arc_meta_limit = 0; |
198 |
uint64_t zfs_arc_meta_limit = 0; |
Lines 204-210
int zfs_arc_shrink_shift = 0;
Link Here
|
204 |
int zfs_arc_p_min_shift = 0; |
201 |
int zfs_arc_p_min_shift = 0; |
205 |
int zfs_disable_dup_eviction = 0; |
202 |
int zfs_disable_dup_eviction = 0; |
206 |
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ |
203 |
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ |
|
|
204 |
u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */ |
205 |
int zfs_arc_reclaim_cache_free = 1; |
207 |
|
206 |
|
|
|
207 |
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); |
208 |
|
209 |
#ifdef _KERNEL |
210 |
static void |
211 |
arc_free_target_init(void *unused __unused) |
212 |
{ |
213 |
|
214 |
zfs_arc_free_target = vm_pageout_wakeup_thresh; |
215 |
} |
216 |
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, |
217 |
arc_free_target_init, NULL); |
218 |
|
208 |
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); |
219 |
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); |
209 |
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); |
220 |
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); |
210 |
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); |
221 |
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); |
Lines 217-223
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_
Link Here
|
217 |
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, |
228 |
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, |
218 |
&zfs_arc_average_blocksize, 0, |
229 |
&zfs_arc_average_blocksize, 0, |
219 |
"ARC average blocksize"); |
230 |
"ARC average blocksize"); |
|
|
231 |
SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_reclaim_cache_free, CTLFLAG_RWTUN, |
232 |
&zfs_arc_reclaim_cache_free, 0, |
233 |
"ARC treats cached pages as free blocksize"); |
234 |
/* |
235 |
* We don't have a tunable for arc_free_target due to the dependency on |
236 |
* pagedaemon initialisation. |
237 |
*/ |
238 |
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, |
239 |
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), |
240 |
sysctl_vfs_zfs_arc_free_target, "IU", |
241 |
"Desired number of free pages below which ARC triggers reclaim"); |
220 |
|
242 |
|
|
|
243 |
static int |
244 |
sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) |
245 |
{ |
246 |
u_int val; |
247 |
int err; |
248 |
|
249 |
val = zfs_arc_free_target; |
250 |
err = sysctl_handle_int(oidp, &val, 0, req); |
251 |
if (err != 0 || req->newptr == NULL) |
252 |
return (err); |
253 |
|
254 |
if (val < minfree) |
255 |
return (EINVAL); |
256 |
if (val > cnt.v_page_count) |
257 |
return (EINVAL); |
258 |
|
259 |
zfs_arc_free_target = val; |
260 |
|
261 |
return (0); |
262 |
} |
263 |
#endif |
264 |
|
221 |
/* |
265 |
/* |
222 |
* Note that buffers can be in one of 6 states: |
266 |
* Note that buffers can be in one of 6 states: |
223 |
* ARC_anon - anonymous (discussed below) |
267 |
* ARC_anon - anonymous (discussed below) |
Lines 2421-2429
arc_flush(spa_t *spa)
Link Here
|
2421 |
void |
2465 |
void |
2422 |
arc_shrink(void) |
2466 |
arc_shrink(void) |
2423 |
{ |
2467 |
{ |
|
|
2468 |
|
2424 |
if (arc_c > arc_c_min) { |
2469 |
if (arc_c > arc_c_min) { |
2425 |
uint64_t to_free; |
2470 |
uint64_t to_free; |
2426 |
|
2471 |
|
|
|
2472 |
DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, |
2473 |
arc_c_min); |
2427 |
#ifdef _KERNEL |
2474 |
#ifdef _KERNEL |
2428 |
to_free = arc_c >> arc_shrink_shift; |
2475 |
to_free = arc_c >> arc_shrink_shift; |
2429 |
#else |
2476 |
#else |
Lines 2443-2450
arc_shrink(void)
Link Here
|
2443 |
ASSERT((int64_t)arc_p >= 0); |
2490 |
ASSERT((int64_t)arc_p >= 0); |
2444 |
} |
2491 |
} |
2445 |
|
2492 |
|
2446 |
if (arc_size > arc_c) |
2493 |
if (arc_size > arc_c) { |
|
|
2494 |
DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, |
2495 |
uint64_t, arc_c); |
2447 |
arc_adjust(); |
2496 |
arc_adjust(); |
|
|
2497 |
} |
2448 |
} |
2498 |
} |
2449 |
|
2499 |
|
2450 |
static int needfree = 0; |
2500 |
static int needfree = 0; |
Lines 2452-2469
static int needfree = 0;
Link Here
|
2452 |
static int |
2502 |
static int |
2453 |
arc_reclaim_needed(void) |
2503 |
arc_reclaim_needed(void) |
2454 |
{ |
2504 |
{ |
|
|
2505 |
u_int fm; |
2455 |
|
2506 |
|
2456 |
#ifdef _KERNEL |
2507 |
#ifdef _KERNEL |
2457 |
|
2508 |
|
2458 |
if (needfree) |
2509 |
if (needfree) { |
|
|
2510 |
DTRACE_PROBE(arc__reclaim_needfree); |
2459 |
return (1); |
2511 |
return (1); |
|
|
2512 |
} |
2460 |
|
2513 |
|
2461 |
/* |
2514 |
/* |
2462 |
* Cooperate with pagedaemon when it's time for it to scan |
2515 |
* Cooperate with pagedaemon when it's time for it to scan |
2463 |
* and reclaim some pages. |
2516 |
* and reclaim some pages. |
2464 |
*/ |
2517 |
*/ |
2465 |
if (vm_paging_needed()) |
2518 |
if (zfs_arc_reclaim_cache_free == 0) |
|
|
2519 |
fm = cnt.v_free_count; |
2520 |
else |
2521 |
fm = freemem; |
2522 |
|
2523 |
if (fm < zfs_arc_free_target) { |
2524 |
DTRACE_PROBE3(arc__reclaim_freemem, uint64_t, |
2525 |
fm, uint64_t, zfs_arc_free_target, |
2526 |
int, zfs_arc_reclaim_cache_free); |
2466 |
return (1); |
2527 |
return (1); |
|
|
2528 |
} |
2467 |
|
2529 |
|
2468 |
#ifdef sun |
2530 |
#ifdef sun |
2469 |
/* |
2531 |
/* |
Lines 2491-2498
arc_reclaim_needed(void)
Link Here
|
2491 |
if (availrmem < swapfs_minfree + swapfs_reserve + extra) |
2553 |
if (availrmem < swapfs_minfree + swapfs_reserve + extra) |
2492 |
return (1); |
2554 |
return (1); |
2493 |
|
2555 |
|
2494 |
#if defined(__i386) |
|
|
2495 |
/* |
2556 |
/* |
|
|
2557 |
* Check that we have enough availrmem that memory locking (e.g., via |
2558 |
* mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum |
2559 |
* stores the number of pages that cannot be locked; when availrmem |
2560 |
* drops below pages_pp_maximum, page locking mechanisms such as |
2561 |
* page_pp_lock() will fail.) |
2562 |
*/ |
2563 |
if (availrmem <= pages_pp_maximum) |
2564 |
return (1); |
2565 |
|
2566 |
#endif /* sun */ |
2567 |
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) |
2568 |
/* |
2496 |
* If we're on an i386 platform, it's possible that we'll exhaust the |
2569 |
* If we're on an i386 platform, it's possible that we'll exhaust the |
2497 |
* kernel heap space before we ever run out of available physical |
2570 |
* kernel heap space before we ever run out of available physical |
2498 |
* memory. Most checks of the size of the heap_area compare against |
2571 |
* memory. Most checks of the size of the heap_area compare against |
Lines 2503-2521
arc_reclaim_needed(void)
Link Here
|
2503 |
* heap is allocated. (Or, in the calculation, if less than 1/4th is |
2576 |
* heap is allocated. (Or, in the calculation, if less than 1/4th is |
2504 |
* free) |
2577 |
* free) |
2505 |
*/ |
2578 |
*/ |
2506 |
if (btop(vmem_size(heap_arena, VMEM_FREE)) < |
2579 |
if (vmem_size(heap_arena, VMEM_FREE) < |
2507 |
(btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) |
2580 |
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) { |
|
|
2581 |
DTRACE_PROBE2(arc__reclaim_used, uint64_t, |
2582 |
vmem_size(heap_arena, VMEM_FREE), uint64_t, |
2583 |
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2); |
2508 |
return (1); |
2584 |
return (1); |
|
|
2585 |
} |
2509 |
#endif |
2586 |
#endif |
2510 |
#else /* !sun */ |
2587 |
#ifdef sun |
2511 |
if (kmem_used() > (kmem_size() * 3) / 4) |
2588 |
/* |
|
|
2589 |
* If zio data pages are being allocated out of a separate heap segment, |
2590 |
* then enforce that the size of available vmem for this arena remains |
2591 |
* above about 1/16th free. |
2592 |
* |
2593 |
* Note: The 1/16th arena free requirement was put in place |
2594 |
* to aggressively evict memory from the arc in order to avoid |
2595 |
* memory fragmentation issues. |
2596 |
*/ |
2597 |
if (zio_arena != NULL && |
2598 |
vmem_size(zio_arena, VMEM_FREE) < |
2599 |
(vmem_size(zio_arena, VMEM_ALLOC) >> 4)) |
2512 |
return (1); |
2600 |
return (1); |
2513 |
#endif /* sun */ |
2601 |
#endif /* sun */ |
2514 |
|
2602 |
#else /* _KERNEL */ |
2515 |
#else |
|
|
2516 |
if (spa_get_random(100) == 0) |
2603 |
if (spa_get_random(100) == 0) |
2517 |
return (1); |
2604 |
return (1); |
2518 |
#endif |
2605 |
#endif /* _KERNEL */ |
|
|
2606 |
DTRACE_PROBE(arc__reclaim_no); |
2607 |
|
2519 |
return (0); |
2608 |
return (0); |
2520 |
} |
2609 |
} |
2521 |
|
2610 |
|
Lines 2529-2534
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
Link Here
|
2529 |
kmem_cache_t *prev_cache = NULL; |
2618 |
kmem_cache_t *prev_cache = NULL; |
2530 |
kmem_cache_t *prev_data_cache = NULL; |
2619 |
kmem_cache_t *prev_data_cache = NULL; |
2531 |
|
2620 |
|
|
|
2621 |
DTRACE_PROBE(arc__kmem_reap_start); |
2532 |
#ifdef _KERNEL |
2622 |
#ifdef _KERNEL |
2533 |
if (arc_meta_used >= arc_meta_limit) { |
2623 |
if (arc_meta_used >= arc_meta_limit) { |
2534 |
/* |
2624 |
/* |
Lines 2564-2569
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
Link Here
|
2564 |
} |
2654 |
} |
2565 |
kmem_cache_reap_now(buf_cache); |
2655 |
kmem_cache_reap_now(buf_cache); |
2566 |
kmem_cache_reap_now(hdr_cache); |
2656 |
kmem_cache_reap_now(hdr_cache); |
|
|
2657 |
|
2658 |
#ifdef sun |
2659 |
/* |
2660 |
* Ask the vmem areana to reclaim unused memory from its |
2661 |
* quantum caches. |
2662 |
*/ |
2663 |
if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) |
2664 |
vmem_qcache_reap(zio_arena); |
2665 |
#endif |
2666 |
DTRACE_PROBE(arc__kmem_reap_end); |
2567 |
} |
2667 |
} |
2568 |
|
2668 |
|
2569 |
static void |
2669 |
static void |
Lines 2713-2732
arc_evict_needed(arc_buf_contents_t type)
Link Here
|
2713 |
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) |
2813 |
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) |
2714 |
return (1); |
2814 |
return (1); |
2715 |
|
2815 |
|
2716 |
#ifdef sun |
|
|
2717 |
#ifdef _KERNEL |
2718 |
/* |
2719 |
* If zio data pages are being allocated out of a separate heap segment, |
2720 |
* then enforce that the size of available vmem for this area remains |
2721 |
* above about 1/32nd free. |
2722 |
*/ |
2723 |
if (type == ARC_BUFC_DATA && zio_arena != NULL && |
2724 |
vmem_size(zio_arena, VMEM_FREE) < |
2725 |
(vmem_size(zio_arena, VMEM_ALLOC) >> 5)) |
2726 |
return (1); |
2727 |
#endif |
2728 |
#endif /* sun */ |
2729 |
|
2730 |
if (arc_reclaim_needed()) |
2816 |
if (arc_reclaim_needed()) |
2731 |
return (1); |
2817 |
return (1); |
2732 |
|
2818 |
|
Lines 3885-3904
static int
Link Here
|
3885 |
arc_memory_throttle(uint64_t reserve, uint64_t txg) |
3971 |
arc_memory_throttle(uint64_t reserve, uint64_t txg) |
3886 |
{ |
3972 |
{ |
3887 |
#ifdef _KERNEL |
3973 |
#ifdef _KERNEL |
3888 |
uint64_t available_memory = |
3974 |
uint64_t available_memory = ptob(freemem); |
3889 |
ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count); |
|
|
3890 |
static uint64_t page_load = 0; |
3975 |
static uint64_t page_load = 0; |
3891 |
static uint64_t last_txg = 0; |
3976 |
static uint64_t last_txg = 0; |
3892 |
|
3977 |
|
3893 |
#ifdef sun |
3978 |
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) |
3894 |
#if defined(__i386) |
|
|
3895 |
available_memory = |
3979 |
available_memory = |
3896 |
MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); |
3980 |
MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); |
3897 |
#endif |
3981 |
#endif |
3898 |
#endif /* sun */ |
|
|
3899 |
|
3982 |
|
3900 |
if (cnt.v_free_count + cnt.v_cache_count > |
3983 |
if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) |
3901 |
(uint64_t)physmem * arc_lotsfree_percent / 100) |
|
|
3902 |
return (0); |
3984 |
return (0); |
3903 |
|
3985 |
|
3904 |
if (txg > last_txg) { |
3986 |
if (txg > last_txg) { |
Lines 3911-3917
arc_memory_throttle(uint64_t reserve, uint64_t txg
Link Here
|
3911 |
* continue to let page writes occur as quickly as possible. |
3993 |
* continue to let page writes occur as quickly as possible. |
3912 |
*/ |
3994 |
*/ |
3913 |
if (curproc == pageproc) { |
3995 |
if (curproc == pageproc) { |
3914 |
if (page_load > available_memory / 4) |
3996 |
if (page_load > MAX(ptob(minfree), available_memory) / 4) |
3915 |
return (SET_ERROR(ERESTART)); |
3997 |
return (SET_ERROR(ERESTART)); |
3916 |
/* Note: reserve is inflated, so we deflate */ |
3998 |
/* Note: reserve is inflated, so we deflate */ |
3917 |
page_load += reserve / 8; |
3999 |
page_load += reserve / 8; |