View | Details | Raw Unified | Return to bug 187594 | Differences between
and this patch

Collapse All | Expand All

(-)sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (-7 lines)
Lines 133-145 kmem_size(void) Link Here
133
	return (kmem_size_val);
133
	return (kmem_size_val);
134
}
134
}
135
135
136
uint64_t
137
kmem_used(void)
138
{
139
140
	return (vmem_size(kmem_arena, VMEM_ALLOC));
141
}
142
143
static int
136
static int
144
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
137
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
145
{
138
{
(-)sys/cddl/compat/opensolaris/sys/kmem.h (-1 / +3 lines)
Lines 66-72 typedef struct kmem_cache { Link Here
66
void *zfs_kmem_alloc(size_t size, int kmflags);
66
void *zfs_kmem_alloc(size_t size, int kmflags);
67
void zfs_kmem_free(void *buf, size_t size);
67
void zfs_kmem_free(void *buf, size_t size);
68
uint64_t kmem_size(void);
68
uint64_t kmem_size(void);
69
uint64_t kmem_used(void);
70
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
69
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
71
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
70
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
72
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
71
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
Lines 78-83 void kmem_reap(void); Link Here
78
int kmem_debugging(void);
77
int kmem_debugging(void);
79
void *calloc(size_t n, size_t s);
78
void *calloc(size_t n, size_t s);
80
79
80
#define	freemem				(cnt.v_free_count + cnt.v_cache_count)
81
#define	minfree				cnt.v_free_min
82
#define	heap_arena			kmem_arena
81
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
83
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
82
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
84
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
83
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
85
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
(-)sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (-37 / +106 lines)
Lines 193-201 extern int zfs_prefetch_disable; Link Here
193
 */
193
 */
194
static boolean_t arc_warm;
194
static boolean_t arc_warm;
195
195
196
/*
197
 * These tunables are for performance analysis.
198
 */
199
uint64_t zfs_arc_max;
196
uint64_t zfs_arc_max;
200
uint64_t zfs_arc_min;
197
uint64_t zfs_arc_min;
201
uint64_t zfs_arc_meta_limit = 0;
198
uint64_t zfs_arc_meta_limit = 0;
Lines 204-210 int zfs_arc_shrink_shift = 0; Link Here
204
int zfs_arc_p_min_shift = 0;
201
int zfs_arc_p_min_shift = 0;
205
int zfs_disable_dup_eviction = 0;
202
int zfs_disable_dup_eviction = 0;
206
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
203
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
204
u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */
207
205
206
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
207
208
#ifdef _KERNEL
209
static void
210
arc_free_target_init(void *unused __unused)
211
{
212
213
	zfs_arc_free_target = vm_pageout_wakeup_thresh;
214
}
215
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
216
    arc_free_target_init, NULL);
217
208
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
218
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
209
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
219
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
210
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
220
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
Lines 217-223 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ Link Here
217
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
227
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
218
    &zfs_arc_average_blocksize, 0,
228
    &zfs_arc_average_blocksize, 0,
219
    "ARC average blocksize");
229
    "ARC average blocksize");
230
/*
231
 * We don't have a tunable for arc_free_target due to the dependency on
232
 * pagedaemon initialisation.
233
 */
234
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
235
    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int),
236
    sysctl_vfs_zfs_arc_free_target, "IU",
237
    "Desired number of free pages below which ARC triggers reclaim");
220
238
239
static int
240
sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
241
{
242
	u_int val;
243
	int err;
244
245
	val = zfs_arc_free_target;
246
	err = sysctl_handle_int(oidp, &val, 0, req);
247
	if (err != 0 || req->newptr == NULL)
248
		return (err);
249
250
	if (val < minfree)
251
		return (EINVAL);
252
	if (val > cnt.v_page_count)
253
		return (EINVAL);
254
255
	zfs_arc_free_target = val;
256
257
	return (0);
258
}
259
#endif
260
221
/*
261
/*
222
 * Note that buffers can be in one of 6 states:
262
 * Note that buffers can be in one of 6 states:
223
 *	ARC_anon	- anonymous (discussed below)
263
 *	ARC_anon	- anonymous (discussed below)
Lines 2421-2429 arc_flush(spa_t *spa) Link Here
2421
void
2461
void
2422
arc_shrink(void)
2462
arc_shrink(void)
2423
{
2463
{
2464
2424
	if (arc_c > arc_c_min) {
2465
	if (arc_c > arc_c_min) {
2425
		uint64_t to_free;
2466
		uint64_t to_free;
2426
2467
2468
		DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t,
2469
			arc_c_min);
2427
#ifdef _KERNEL
2470
#ifdef _KERNEL
2428
		to_free = arc_c >> arc_shrink_shift;
2471
		to_free = arc_c >> arc_shrink_shift;
2429
#else
2472
#else
Lines 2443-2450 arc_shrink(void) Link Here
2443
		ASSERT((int64_t)arc_p >= 0);
2486
		ASSERT((int64_t)arc_p >= 0);
2444
	}
2487
	}
2445
2488
2446
	if (arc_size > arc_c)
2489
	if (arc_size > arc_c) {
2490
		DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size,
2491
			uint64_t, arc_c);
2447
		arc_adjust();
2492
		arc_adjust();
2493
	}
2448
}
2494
}
2449
2495
2450
static int needfree = 0;
2496
static int needfree = 0;
Lines 2455-2469 arc_reclaim_needed(void) Link Here
2455
2501
2456
#ifdef _KERNEL
2502
#ifdef _KERNEL
2457
2503
2458
	if (needfree)
2504
	if (needfree) {
2505
		DTRACE_PROBE(arc__reclaim_needfree);
2459
		return (1);
2506
		return (1);
2507
	}
2460
2508
2461
	/*
2509
	/*
2462
	 * Cooperate with pagedaemon when it's time for it to scan
2510
	 * Cooperate with pagedaemon when it's time for it to scan
2463
	 * and reclaim some pages.
2511
	 * and reclaim some pages.
2464
	 */
2512
	 */
2465
	if (vm_paging_needed())
2513
	if (freemem < zfs_arc_free_target) {
2514
		DTRACE_PROBE2(arc__reclaim_freemem, uint64_t,
2515
		    freemem, uint64_t, zfs_arc_free_target);
2466
		return (1);
2516
		return (1);
2517
	}
2467
2518
2468
#ifdef sun
2519
#ifdef sun
2469
	/*
2520
	/*
Lines 2491-2498 arc_reclaim_needed(void) Link Here
2491
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2542
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2492
		return (1);
2543
		return (1);
2493
2544
2494
#if defined(__i386)
2495
	/*
2545
	/*
2546
	 * Check that we have enough availrmem that memory locking (e.g., via
2547
	 * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
2548
	 * stores the number of pages that cannot be locked; when availrmem
2549
	 * drops below pages_pp_maximum, page locking mechanisms such as
2550
	 * page_pp_lock() will fail.)
2551
	 */
2552
	if (availrmem <= pages_pp_maximum)
2553
		return (1);
2554
2555
#endif	/* sun */
2556
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
2557
	/*
2496
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2558
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2497
	 * kernel heap space before we ever run out of available physical
2559
	 * kernel heap space before we ever run out of available physical
2498
	 * memory.  Most checks of the size of the heap_area compare against
2560
	 * memory.  Most checks of the size of the heap_area compare against
Lines 2503-2521 arc_reclaim_needed(void) Link Here
2503
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2565
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2504
	 * free)
2566
	 * free)
2505
	 */
2567
	 */
2506
	if (btop(vmem_size(heap_arena, VMEM_FREE)) <
2568
	if (vmem_size(heap_arena, VMEM_FREE) <
2507
	    (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
2569
	    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
2570
		DTRACE_PROBE2(arc__reclaim_used, uint64_t,
2571
		    vmem_size(heap_arena, VMEM_FREE), uint64_t,
2572
		    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
2508
		return (1);
2573
		return (1);
2574
	}
2509
#endif
2575
#endif
2510
#else	/* !sun */
2576
#ifdef sun
2511
	if (kmem_used() > (kmem_size() * 3) / 4)
2577
	/*
2578
	 * If zio data pages are being allocated out of a separate heap segment,
2579
	 * then enforce that the size of available vmem for this arena remains
2580
	 * above about 1/16th free.
2581
	 *
2582
	 * Note: The 1/16th arena free requirement was put in place
2583
	 * to aggressively evict memory from the arc in order to avoid
2584
	 * memory fragmentation issues.
2585
	 */
2586
	if (zio_arena != NULL &&
2587
	    vmem_size(zio_arena, VMEM_FREE) <
2588
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
2512
		return (1);
2589
		return (1);
2513
#endif	/* sun */
2590
#endif	/* sun */
2514
2591
#else	/* _KERNEL */
2515
#else
2516
	if (spa_get_random(100) == 0)
2592
	if (spa_get_random(100) == 0)
2517
		return (1);
2593
		return (1);
2518
#endif
2594
#endif	/* _KERNEL */
2595
	DTRACE_PROBE(arc__reclaim_no);
2596
2519
	return (0);
2597
	return (0);
2520
}
2598
}
2521
2599
Lines 2564-2569 arc_kmem_reap_now(arc_reclaim_strategy_t strat) Link Here
2564
	}
2642
	}
2565
	kmem_cache_reap_now(buf_cache);
2643
	kmem_cache_reap_now(buf_cache);
2566
	kmem_cache_reap_now(hdr_cache);
2644
	kmem_cache_reap_now(hdr_cache);
2645
2646
#ifdef sun
2647
	/*
2648
	 * Ask the vmem areana to reclaim unused memory from its
2649
	 * quantum caches.
2650
	 */
2651
	if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
2652
		vmem_qcache_reap(zio_arena);
2653
#endif
2567
}
2654
}
2568
2655
2569
static void
2656
static void
Lines 2713-2732 arc_evict_needed(arc_buf_contents_t type) Link Here
2713
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2800
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2714
		return (1);
2801
		return (1);
2715
2802
2716
#ifdef sun
2717
#ifdef _KERNEL
2718
	/*
2719
	 * If zio data pages are being allocated out of a separate heap segment,
2720
	 * then enforce that the size of available vmem for this area remains
2721
	 * above about 1/32nd free.
2722
	 */
2723
	if (type == ARC_BUFC_DATA && zio_arena != NULL &&
2724
	    vmem_size(zio_arena, VMEM_FREE) <
2725
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
2726
		return (1);
2727
#endif
2728
#endif	/* sun */
2729
2730
	if (arc_reclaim_needed())
2803
	if (arc_reclaim_needed())
2731
		return (1);
2804
		return (1);
2732
2805
Lines 3885-3904 static int Link Here
3885
arc_memory_throttle(uint64_t reserve, uint64_t txg)
3958
arc_memory_throttle(uint64_t reserve, uint64_t txg)
3886
{
3959
{
3887
#ifdef _KERNEL
3960
#ifdef _KERNEL
3888
	uint64_t available_memory =
3961
	uint64_t available_memory = ptob(freemem);
3889
	    ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count);
3890
	static uint64_t page_load = 0;
3962
	static uint64_t page_load = 0;
3891
	static uint64_t last_txg = 0;
3963
	static uint64_t last_txg = 0;
3892
3964
3893
#ifdef sun
3965
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
3894
#if defined(__i386)
3895
	available_memory =
3966
	available_memory =
3896
	    MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
3967
	    MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE)));
3897
#endif
3968
#endif
3898
#endif	/* sun */
3899
3969
3900
	if (cnt.v_free_count + cnt.v_cache_count >
3970
	if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100)
3901
	    (uint64_t)physmem * arc_lotsfree_percent / 100)
3902
		return (0);
3971
		return (0);
3903
3972
3904
	if (txg > last_txg) {
3973
	if (txg > last_txg) {
Lines 3911-3917 arc_memory_throttle(uint64_t reserve, uint64_t txg Link Here
3911
	 * continue to let page writes occur as quickly as possible.
3980
	 * continue to let page writes occur as quickly as possible.
3912
	 */
3981
	 */
3913
	if (curproc == pageproc) {
3982
	if (curproc == pageproc) {
3914
		if (page_load > available_memory / 4)
3983
		if (page_load > MAX(ptob(minfree), available_memory) / 4)
3915
			return (SET_ERROR(ERESTART));
3984
			return (SET_ERROR(ERESTART));
3916
		/* Note: reserve is inflated, so we deflate */
3985
		/* Note: reserve is inflated, so we deflate */
3917
		page_load += reserve / 8;
3986
		page_load += reserve / 8;

Return to bug 187594