Lines 193-201
extern int zfs_prefetch_disable;
Link Here
|
193 |
*/ |
193 |
*/ |
194 |
static boolean_t arc_warm; |
194 |
static boolean_t arc_warm; |
195 |
|
195 |
|
196 |
/* |
|
|
197 |
* These tunables are for performance analysis. |
198 |
*/ |
199 |
uint64_t zfs_arc_max; |
196 |
uint64_t zfs_arc_max; |
200 |
uint64_t zfs_arc_min; |
197 |
uint64_t zfs_arc_min; |
201 |
uint64_t zfs_arc_meta_limit = 0; |
198 |
uint64_t zfs_arc_meta_limit = 0; |
Lines 204-210
int zfs_arc_shrink_shift = 0;
Link Here
|
204 |
int zfs_arc_p_min_shift = 0; |
201 |
int zfs_arc_p_min_shift = 0; |
205 |
int zfs_disable_dup_eviction = 0; |
202 |
int zfs_disable_dup_eviction = 0; |
206 |
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ |
203 |
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ |
|
|
204 |
u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */ |
207 |
|
205 |
|
|
|
206 |
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); |
207 |
|
208 |
#ifdef _KERNEL |
209 |
static void |
210 |
arc_free_target_init(void *unused __unused) |
211 |
{ |
212 |
|
213 |
zfs_arc_free_target = vm_pageout_wakeup_thresh; |
214 |
} |
215 |
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, |
216 |
arc_free_target_init, NULL); |
217 |
|
208 |
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); |
218 |
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); |
209 |
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); |
219 |
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); |
210 |
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); |
220 |
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); |
Lines 217-223
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_
Link Here
|
217 |
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, |
227 |
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, |
218 |
&zfs_arc_average_blocksize, 0, |
228 |
&zfs_arc_average_blocksize, 0, |
219 |
"ARC average blocksize"); |
229 |
"ARC average blocksize"); |
|
|
230 |
/* |
231 |
* We don't have a tunable for arc_free_target due to the dependency on |
232 |
* pagedaemon initialisation. |
233 |
*/ |
234 |
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, |
235 |
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), |
236 |
sysctl_vfs_zfs_arc_free_target, "IU", |
237 |
"Desired number of free pages below which ARC triggers reclaim"); |
220 |
|
238 |
|
|
|
239 |
static int |
240 |
sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) |
241 |
{ |
242 |
u_int val; |
243 |
int err; |
244 |
|
245 |
val = zfs_arc_free_target; |
246 |
err = sysctl_handle_int(oidp, &val, 0, req); |
247 |
if (err != 0 || req->newptr == NULL) |
248 |
return (err); |
249 |
|
250 |
if (val < minfree) |
251 |
return (EINVAL); |
252 |
if (val > cnt.v_page_count) |
253 |
return (EINVAL); |
254 |
|
255 |
zfs_arc_free_target = val; |
256 |
|
257 |
return (0); |
258 |
} |
259 |
#endif |
260 |
|
221 |
/* |
261 |
/* |
222 |
* Note that buffers can be in one of 6 states: |
262 |
* Note that buffers can be in one of 6 states: |
223 |
* ARC_anon - anonymous (discussed below) |
263 |
* ARC_anon - anonymous (discussed below) |
Lines 2421-2429
arc_flush(spa_t *spa)
Link Here
|
2421 |
void |
2461 |
void |
2422 |
arc_shrink(void) |
2462 |
arc_shrink(void) |
2423 |
{ |
2463 |
{ |
|
|
2464 |
|
2424 |
if (arc_c > arc_c_min) { |
2465 |
if (arc_c > arc_c_min) { |
2425 |
uint64_t to_free; |
2466 |
uint64_t to_free; |
2426 |
|
2467 |
|
|
|
2468 |
DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, |
2469 |
arc_c_min); |
2427 |
#ifdef _KERNEL |
2470 |
#ifdef _KERNEL |
2428 |
to_free = arc_c >> arc_shrink_shift; |
2471 |
to_free = arc_c >> arc_shrink_shift; |
2429 |
#else |
2472 |
#else |
Lines 2443-2450
arc_shrink(void)
Link Here
|
2443 |
ASSERT((int64_t)arc_p >= 0); |
2486 |
ASSERT((int64_t)arc_p >= 0); |
2444 |
} |
2487 |
} |
2445 |
|
2488 |
|
2446 |
if (arc_size > arc_c) |
2489 |
if (arc_size > arc_c) { |
|
|
2490 |
DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, |
2491 |
uint64_t, arc_c); |
2447 |
arc_adjust(); |
2492 |
arc_adjust(); |
|
|
2493 |
} |
2448 |
} |
2494 |
} |
2449 |
|
2495 |
|
2450 |
static int needfree = 0; |
2496 |
static int needfree = 0; |
Lines 2455-2469
arc_reclaim_needed(void)
Link Here
|
2455 |
|
2501 |
|
2456 |
#ifdef _KERNEL |
2502 |
#ifdef _KERNEL |
2457 |
|
2503 |
|
2458 |
if (needfree) |
2504 |
if (needfree) { |
|
|
2505 |
DTRACE_PROBE(arc__reclaim_needfree); |
2459 |
return (1); |
2506 |
return (1); |
|
|
2507 |
} |
2460 |
|
2508 |
|
2461 |
/* |
2509 |
/* |
2462 |
* Cooperate with pagedaemon when it's time for it to scan |
2510 |
* Cooperate with pagedaemon when it's time for it to scan |
2463 |
* and reclaim some pages. |
2511 |
* and reclaim some pages. |
2464 |
*/ |
2512 |
*/ |
2465 |
if (vm_paging_needed()) |
2513 |
if (freemem < zfs_arc_free_target) { |
|
|
2514 |
DTRACE_PROBE2(arc__reclaim_freemem, uint64_t, |
2515 |
freemem, uint64_t, zfs_arc_free_target); |
2466 |
return (1); |
2516 |
return (1); |
|
|
2517 |
} |
2467 |
|
2518 |
|
2468 |
#ifdef sun |
2519 |
#ifdef sun |
2469 |
/* |
2520 |
/* |
Lines 2491-2498
arc_reclaim_needed(void)
Link Here
|
2491 |
if (availrmem < swapfs_minfree + swapfs_reserve + extra) |
2542 |
if (availrmem < swapfs_minfree + swapfs_reserve + extra) |
2492 |
return (1); |
2543 |
return (1); |
2493 |
|
2544 |
|
2494 |
#if defined(__i386) |
|
|
2495 |
/* |
2545 |
/* |
|
|
2546 |
* Check that we have enough availrmem that memory locking (e.g., via |
2547 |
* mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum |
2548 |
* stores the number of pages that cannot be locked; when availrmem |
2549 |
* drops below pages_pp_maximum, page locking mechanisms such as |
2550 |
* page_pp_lock() will fail.) |
2551 |
*/ |
2552 |
if (availrmem <= pages_pp_maximum) |
2553 |
return (1); |
2554 |
|
2555 |
#endif /* sun */ |
2556 |
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) |
2557 |
/* |
2496 |
* If we're on an i386 platform, it's possible that we'll exhaust the |
2558 |
* If we're on an i386 platform, it's possible that we'll exhaust the |
2497 |
* kernel heap space before we ever run out of available physical |
2559 |
* kernel heap space before we ever run out of available physical |
2498 |
* memory. Most checks of the size of the heap_area compare against |
2560 |
* memory. Most checks of the size of the heap_area compare against |
Lines 2503-2521
arc_reclaim_needed(void)
Link Here
|
2503 |
* heap is allocated. (Or, in the calculation, if less than 1/4th is |
2565 |
* heap is allocated. (Or, in the calculation, if less than 1/4th is |
2504 |
* free) |
2566 |
* free) |
2505 |
*/ |
2567 |
*/ |
2506 |
if (btop(vmem_size(heap_arena, VMEM_FREE)) < |
2568 |
if (vmem_size(heap_arena, VMEM_FREE) < |
2507 |
(btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) |
2569 |
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) { |
|
|
2570 |
DTRACE_PROBE2(arc__reclaim_used, uint64_t, |
2571 |
vmem_size(heap_arena, VMEM_FREE), uint64_t, |
2572 |
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2); |
2508 |
return (1); |
2573 |
return (1); |
|
|
2574 |
} |
2509 |
#endif |
2575 |
#endif |
2510 |
#else /* !sun */ |
2576 |
#ifdef sun |
2511 |
if (kmem_used() > (kmem_size() * 3) / 4) |
2577 |
/* |
|
|
2578 |
* If zio data pages are being allocated out of a separate heap segment, |
2579 |
* then enforce that the size of available vmem for this arena remains |
2580 |
* above about 1/16th free. |
2581 |
* |
2582 |
* Note: The 1/16th arena free requirement was put in place |
2583 |
* to aggressively evict memory from the arc in order to avoid |
2584 |
* memory fragmentation issues. |
2585 |
*/ |
2586 |
if (zio_arena != NULL && |
2587 |
vmem_size(zio_arena, VMEM_FREE) < |
2588 |
(vmem_size(zio_arena, VMEM_ALLOC) >> 4)) |
2512 |
return (1); |
2589 |
return (1); |
2513 |
#endif /* sun */ |
2590 |
#endif /* sun */ |
2514 |
|
2591 |
#else /* _KERNEL */ |
2515 |
#else |
|
|
2516 |
if (spa_get_random(100) == 0) |
2592 |
if (spa_get_random(100) == 0) |
2517 |
return (1); |
2593 |
return (1); |
2518 |
#endif |
2594 |
#endif /* _KERNEL */ |
|
|
2595 |
DTRACE_PROBE(arc__reclaim_no); |
2596 |
|
2519 |
return (0); |
2597 |
return (0); |
2520 |
} |
2598 |
} |
2521 |
|
2599 |
|
Lines 2564-2569
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
Link Here
|
2564 |
} |
2642 |
} |
2565 |
kmem_cache_reap_now(buf_cache); |
2643 |
kmem_cache_reap_now(buf_cache); |
2566 |
kmem_cache_reap_now(hdr_cache); |
2644 |
kmem_cache_reap_now(hdr_cache); |
|
|
2645 |
|
2646 |
#ifdef sun |
2647 |
/* |
2648 |
* Ask the vmem areana to reclaim unused memory from its |
2649 |
* quantum caches. |
2650 |
*/ |
2651 |
if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR) |
2652 |
vmem_qcache_reap(zio_arena); |
2653 |
#endif |
2567 |
} |
2654 |
} |
2568 |
|
2655 |
|
2569 |
static void |
2656 |
static void |
Lines 2713-2732
arc_evict_needed(arc_buf_contents_t type)
Link Here
|
2713 |
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) |
2800 |
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit) |
2714 |
return (1); |
2801 |
return (1); |
2715 |
|
2802 |
|
2716 |
#ifdef sun |
|
|
2717 |
#ifdef _KERNEL |
2718 |
/* |
2719 |
* If zio data pages are being allocated out of a separate heap segment, |
2720 |
* then enforce that the size of available vmem for this area remains |
2721 |
* above about 1/32nd free. |
2722 |
*/ |
2723 |
if (type == ARC_BUFC_DATA && zio_arena != NULL && |
2724 |
vmem_size(zio_arena, VMEM_FREE) < |
2725 |
(vmem_size(zio_arena, VMEM_ALLOC) >> 5)) |
2726 |
return (1); |
2727 |
#endif |
2728 |
#endif /* sun */ |
2729 |
|
2730 |
if (arc_reclaim_needed()) |
2803 |
if (arc_reclaim_needed()) |
2731 |
return (1); |
2804 |
return (1); |
2732 |
|
2805 |
|
Lines 3885-3904
static int
Link Here
|
3885 |
arc_memory_throttle(uint64_t reserve, uint64_t txg) |
3958 |
arc_memory_throttle(uint64_t reserve, uint64_t txg) |
3886 |
{ |
3959 |
{ |
3887 |
#ifdef _KERNEL |
3960 |
#ifdef _KERNEL |
3888 |
uint64_t available_memory = |
3961 |
uint64_t available_memory = ptob(freemem); |
3889 |
ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count); |
|
|
3890 |
static uint64_t page_load = 0; |
3962 |
static uint64_t page_load = 0; |
3891 |
static uint64_t last_txg = 0; |
3963 |
static uint64_t last_txg = 0; |
3892 |
|
3964 |
|
3893 |
#ifdef sun |
3965 |
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) |
3894 |
#if defined(__i386) |
|
|
3895 |
available_memory = |
3966 |
available_memory = |
3896 |
MIN(available_memory, vmem_size(heap_arena, VMEM_FREE)); |
3967 |
MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); |
3897 |
#endif |
3968 |
#endif |
3898 |
#endif /* sun */ |
|
|
3899 |
|
3969 |
|
3900 |
if (cnt.v_free_count + cnt.v_cache_count > |
3970 |
if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) |
3901 |
(uint64_t)physmem * arc_lotsfree_percent / 100) |
|
|
3902 |
return (0); |
3971 |
return (0); |
3903 |
|
3972 |
|
3904 |
if (txg > last_txg) { |
3973 |
if (txg > last_txg) { |
Lines 3911-3917
arc_memory_throttle(uint64_t reserve, uint64_t txg
Link Here
|
3911 |
* continue to let page writes occur as quickly as possible. |
3980 |
* continue to let page writes occur as quickly as possible. |
3912 |
*/ |
3981 |
*/ |
3913 |
if (curproc == pageproc) { |
3982 |
if (curproc == pageproc) { |
3914 |
if (page_load > available_memory / 4) |
3983 |
if (page_load > MAX(ptob(minfree), available_memory) / 4) |
3915 |
return (SET_ERROR(ERESTART)); |
3984 |
return (SET_ERROR(ERESTART)); |
3916 |
/* Note: reserve is inflated, so we deflate */ |
3985 |
/* Note: reserve is inflated, so we deflate */ |
3917 |
page_load += reserve / 8; |
3986 |
page_load += reserve / 8; |