View | Details | Raw Unified | Return to bug 187594 | Differences between
and this patch

Collapse All | Expand All

(-)sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (-7 lines)
Lines 133-145 kmem_size(void) Link Here
133
	return (kmem_size_val);
133
	return (kmem_size_val);
134
}
134
}
135
135
136
uint64_t
137
kmem_used(void)
138
{
139
140
	return (vmem_size(kmem_arena, VMEM_ALLOC));
141
}
142
143
static int
136
static int
144
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
137
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
145
{
138
{
(-)sys/cddl/compat/opensolaris/sys/kmem.h (-1 / +3 lines)
Lines 66-72 typedef struct kmem_cache { Link Here
66
void *zfs_kmem_alloc(size_t size, int kmflags);
66
void *zfs_kmem_alloc(size_t size, int kmflags);
67
void zfs_kmem_free(void *buf, size_t size);
67
void zfs_kmem_free(void *buf, size_t size);
68
uint64_t kmem_size(void);
68
uint64_t kmem_size(void);
69
uint64_t kmem_used(void);
70
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
69
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
71
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
70
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
72
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
71
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
Lines 78-83 void kmem_reap(void); Link Here
78
int kmem_debugging(void);
77
int kmem_debugging(void);
79
void *calloc(size_t n, size_t s);
78
void *calloc(size_t n, size_t s);
80
79
80
#define	freemem				(cnt.v_free_count + cnt.v_cache_count)
81
#define	minfree				cnt.v_free_min
82
#define	heap_arena			kmem_arena
81
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
83
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
82
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
84
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
83
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
85
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
(-)sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (-37 / +119 lines)
Lines 193-201 extern int zfs_prefetch_disable; Link Here
193
 */
193
 */
194
static boolean_t arc_warm;
194
static boolean_t arc_warm;
195
195
196
/*
197
 * These tunables are for performance analysis.
198
 */
199
uint64_t zfs_arc_max;
196
uint64_t zfs_arc_max;
200
uint64_t zfs_arc_min;
197
uint64_t zfs_arc_min;
201
uint64_t zfs_arc_meta_limit = 0;
198
uint64_t zfs_arc_meta_limit = 0;
Lines 204-210 int zfs_arc_shrink_shift = 0; Link Here
204
int zfs_arc_p_min_shift = 0;
201
int zfs_arc_p_min_shift = 0;
205
int zfs_disable_dup_eviction = 0;
202
int zfs_disable_dup_eviction = 0;
206
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
203
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
204
u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */
205
int zfs_arc_reclaim_cache_free = 1;
207
206
207
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
208
209
#ifdef _KERNEL
210
static void
211
arc_free_target_init(void *unused __unused)
212
{
213
214
	zfs_arc_free_target = vm_pageout_wakeup_thresh;
215
}
216
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
217
    arc_free_target_init, NULL);
218
208
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
219
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
209
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
220
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
210
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
221
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
Lines 217-223 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ Link Here
217
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
228
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
218
    &zfs_arc_average_blocksize, 0,
229
    &zfs_arc_average_blocksize, 0,
219
    "ARC average blocksize");
230
    "ARC average blocksize");
231
SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_reclaim_cache_free, CTLFLAG_RWTUN,
232
    &zfs_arc_reclaim_cache_free, 0,
233
    "ARC treats cached pages as free blocksize");
234
/*
235
 * We don't have a tunable for arc_free_target due to the dependency on
236
 * pagedaemon initialisation.
237
 */
238
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
239
    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int),
240
    sysctl_vfs_zfs_arc_free_target, "IU",
241
    "Desired number of free pages below which ARC triggers reclaim");
220
242
243
static int
244
sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
245
{
246
	u_int val;
247
	int err;
248
249
	val = zfs_arc_free_target;
250
	err = sysctl_handle_int(oidp, &val, 0, req);
251
	if (err != 0 || req->newptr == NULL)
252
		return (err);
253
254
	if (val < minfree)
255
		return (EINVAL);
256
	if (val > cnt.v_page_count)
257
		return (EINVAL);
258
259
	zfs_arc_free_target = val;
260
261
	return (0);
262
}
263
#endif
264
221
/*
265
/*
222
 * Note that buffers can be in one of 6 states:
266
 * Note that buffers can be in one of 6 states:
223
 *	ARC_anon	- anonymous (discussed below)
267
 *	ARC_anon	- anonymous (discussed below)
Lines 2421-2429 arc_flush(spa_t *spa) Link Here
2421
void
2465
void
2422
arc_shrink(void)
2466
arc_shrink(void)
2423
{
2467
{
2468
2424
	if (arc_c > arc_c_min) {
2469
	if (arc_c > arc_c_min) {
2425
		uint64_t to_free;
2470
		uint64_t to_free;
2426
2471
2472
		DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t,
2473
			arc_c_min);
2427
#ifdef _KERNEL
2474
#ifdef _KERNEL
2428
		to_free = arc_c >> arc_shrink_shift;
2475
		to_free = arc_c >> arc_shrink_shift;
2429
#else
2476
#else
Lines 2443-2450 arc_shrink(void) Link Here
2443
		ASSERT((int64_t)arc_p >= 0);
2490
		ASSERT((int64_t)arc_p >= 0);
2444
	}
2491
	}
2445
2492
2446
	if (arc_size > arc_c)
2493
	if (arc_size > arc_c) {
2494
		DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size,
2495
			uint64_t, arc_c);
2447
		arc_adjust();
2496
		arc_adjust();
2497
	}
2448
}
2498
}
2449
2499
2450
static int needfree = 0;
2500
static int needfree = 0;
Lines 2452-2469 static int needfree = 0; Link Here
2452
static int
2502
static int
2453
arc_reclaim_needed(void)
2503
arc_reclaim_needed(void)
2454
{
2504
{
2505
	u_int fm;
2455
2506
2456
#ifdef _KERNEL
2507
#ifdef _KERNEL
2457
2508
2458
	if (needfree)
2509
	if (needfree) {
2510
		DTRACE_PROBE(arc__reclaim_needfree);
2459
		return (1);
2511
		return (1);
2512
	}
2460
2513
2461
	/*
2514
	/*
2462
	 * Cooperate with pagedaemon when it's time for it to scan
2515
	 * Cooperate with pagedaemon when it's time for it to scan
2463
	 * and reclaim some pages.
2516
	 * and reclaim some pages.
2464
	 */
2517
	 */
2465
	if (vm_paging_needed())
2518
	if (zfs_arc_reclaim_cache_free == 0)
2519
		fm = cnt.v_free_count;
2520
	else
2521
		fm = freemem;
2522
2523
	if (fm < zfs_arc_free_target) {
2524
		DTRACE_PROBE3(arc__reclaim_freemem, uint64_t,
2525
		    fm, uint64_t, zfs_arc_free_target,
2526
		    int, zfs_arc_reclaim_cache_free);
2466
		return (1);
2527
		return (1);
2528
	}
2467
2529
2468
#ifdef sun
2530
#ifdef sun
2469
	/*
2531
	/*
Lines 2491-2498 arc_reclaim_needed(void) Link Here
2491
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2553
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2492
		return (1);
2554
		return (1);
2493
2555
2494
#if defined(__i386)
2495
	/*
2556
	/*
2557
	 * Check that we have enough availrmem that memory locking (e.g., via
2558
	 * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
2559
	 * stores the number of pages that cannot be locked; when availrmem
2560
	 * drops below pages_pp_maximum, page locking mechanisms such as
2561
	 * page_pp_lock() will fail.)
2562
	 */
2563
	if (availrmem <= pages_pp_maximum)
2564
		return (1);
2565
2566
#endif	/* sun */
2567
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
2568
	/*
2496
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2569
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2497
	 * kernel heap space before we ever run out of available physical
2570
	 * kernel heap space before we ever run out of available physical
2498
	 * memory.  Most checks of the size of the heap_area compare against
2571
	 * memory.  Most checks of the size of the heap_area compare against
Lines 2503-2521 arc_reclaim_needed(void) Link Here
2503
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2576
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2504
	 * free)
2577
	 * free)
2505
	 */
2578
	 */
2506
	if (btop(vmem_size(heap_arena, VMEM_FREE)) <
2579
	if (vmem_size(heap_arena, VMEM_FREE) <
2507
	    (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
2580
	    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
2581
		DTRACE_PROBE2(arc__reclaim_used, uint64_t,
2582
		    vmem_size(heap_arena, VMEM_FREE), uint64_t,
2583
		    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
2508
		return (1);
2584
		return (1);
2585
	}
2509
#endif
2586
#endif
2510
#else	/* !sun */
2587
#ifdef sun
2511
	if (kmem_used() > (kmem_size() * 3) / 4)
2588
	/*
2589
	 * If zio data pages are being allocated out of a separate heap segment,
2590
	 * then enforce that the size of available vmem for this arena remains
2591
	 * above about 1/16th free.
2592
	 *
2593
	 * Note: The 1/16th arena free requirement was put in place
2594
	 * to aggressively evict memory from the arc in order to avoid
2595
	 * memory fragmentation issues.
2596
	 */
2597
	if (zio_arena != NULL &&
2598
	    vmem_size(zio_arena, VMEM_FREE) <
2599
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
2512
		return (1);
2600
		return (1);
2513
#endif	/* sun */
2601
#endif	/* sun */
2514
2602
#else	/* _KERNEL */
2515
#else
2516
	if (spa_get_random(100) == 0)
2603
	if (spa_get_random(100) == 0)
2517
		return (1);
2604
		return (1);
2518
#endif
2605
#endif	/* _KERNEL */
2606
	DTRACE_PROBE(arc__reclaim_no);
2607
2519
	return (0);
2608
	return (0);
2520
}
2609
}
2521
2610
Lines 2529-2534 arc_kmem_reap_now(arc_reclaim_strategy_t strat) Link Here
2529
	kmem_cache_t		*prev_cache = NULL;
2618
	kmem_cache_t		*prev_cache = NULL;
2530
	kmem_cache_t		*prev_data_cache = NULL;
2619
	kmem_cache_t		*prev_data_cache = NULL;
2531
2620
2621
	DTRACE_PROBE(arc__kmem_reap_start);
2532
#ifdef _KERNEL
2622
#ifdef _KERNEL
2533
	if (arc_meta_used >= arc_meta_limit) {
2623
	if (arc_meta_used >= arc_meta_limit) {
2534
		/*
2624
		/*
Lines 2564-2569 arc_kmem_reap_now(arc_reclaim_strategy_t strat) Link Here
2564
	}
2654
	}
2565
	kmem_cache_reap_now(buf_cache);
2655
	kmem_cache_reap_now(buf_cache);
2566
	kmem_cache_reap_now(hdr_cache);
2656
	kmem_cache_reap_now(hdr_cache);
2657
2658
#ifdef sun
2659
	/*
2660
	 * Ask the vmem areana to reclaim unused memory from its
2661
	 * quantum caches.
2662
	 */
2663
	if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
2664
		vmem_qcache_reap(zio_arena);
2665
#endif
2666
	DTRACE_PROBE(arc__kmem_reap_end);
2567
}
2667
}
2568
2668
2569
static void
2669
static void
Lines 2713-2732 arc_evict_needed(arc_buf_contents_t type) Link Here
2713
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2813
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2714
		return (1);
2814
		return (1);
2715
2815
2716
#ifdef sun
2717
#ifdef _KERNEL
2718
	/*
2719
	 * If zio data pages are being allocated out of a separate heap segment,
2720
	 * then enforce that the size of available vmem for this area remains
2721
	 * above about 1/32nd free.
2722
	 */
2723
	if (type == ARC_BUFC_DATA && zio_arena != NULL &&
2724
	    vmem_size(zio_arena, VMEM_FREE) <
2725
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
2726
		return (1);
2727
#endif
2728
#endif	/* sun */
2729
2730
	if (arc_reclaim_needed())
2816
	if (arc_reclaim_needed())
2731
		return (1);
2817
		return (1);
2732
2818
Lines 3885-3904 static int Link Here
3885
arc_memory_throttle(uint64_t reserve, uint64_t txg)
3971
arc_memory_throttle(uint64_t reserve, uint64_t txg)
3886
{
3972
{
3887
#ifdef _KERNEL
3973
#ifdef _KERNEL
3888
	uint64_t available_memory =
3974
	uint64_t available_memory = ptob(freemem);
3889
	    ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count);
3890
	static uint64_t page_load = 0;
3975
	static uint64_t page_load = 0;
3891
	static uint64_t last_txg = 0;
3976
	static uint64_t last_txg = 0;
3892
3977
3893
#ifdef sun
3978
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
3894
#if defined(__i386)
3895
	available_memory =
3979
	available_memory =
3896
	    MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
3980
	    MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE)));
3897
#endif
3981
#endif
3898
#endif	/* sun */
3899
3982
3900
	if (cnt.v_free_count + cnt.v_cache_count >
3983
	if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100)
3901
	    (uint64_t)physmem * arc_lotsfree_percent / 100)
3902
		return (0);
3984
		return (0);
3903
3985
3904
	if (txg > last_txg) {
3986
	if (txg > last_txg) {
Lines 3911-3917 arc_memory_throttle(uint64_t reserve, uint64_t txg Link Here
3911
	 * continue to let page writes occur as quickly as possible.
3993
	 * continue to let page writes occur as quickly as possible.
3912
	 */
3994
	 */
3913
	if (curproc == pageproc) {
3995
	if (curproc == pageproc) {
3914
		if (page_load > available_memory / 4)
3996
		if (page_load > MAX(ptob(minfree), available_memory) / 4)
3915
			return (SET_ERROR(ERESTART));
3997
			return (SET_ERROR(ERESTART));
3916
		/* Note: reserve is inflated, so we deflate */
3998
		/* Note: reserve is inflated, so we deflate */
3917
		page_load += reserve / 8;
3999
		page_load += reserve / 8;
(-)sys/vm/vm_pageout.c (-7 / +18 lines)
Lines 115-124 __FBSDID("$FreeBSD$"); Link Here
115
115
116
/* the kernel process "vm_pageout"*/
116
/* the kernel process "vm_pageout"*/
117
static void vm_pageout(void);
117
static void vm_pageout(void);
118
static void vm_pageout_init(void);
118
static int vm_pageout_clean(vm_page_t);
119
static int vm_pageout_clean(vm_page_t);
119
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
120
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
120
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
121
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
121
122
123
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
124
    NULL);
125
122
struct proc *pageproc;
126
struct proc *pageproc;
123
127
124
static struct kproc_desc page_kp = {
128
static struct kproc_desc page_kp = {
Lines 126-132 static struct kproc_desc page_kp = { Link Here
126
	vm_pageout,
130
	vm_pageout,
127
	&pageproc
131
	&pageproc
128
};
132
};
129
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start,
133
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
130
    &page_kp);
134
    &page_kp);
131
135
132
#if !defined(NO_SWAPPING)
136
#if !defined(NO_SWAPPING)
Lines 1650-1664 vm_pageout_worker(void *arg) Link Here
1650
}
1654
}
1651
1655
1652
/*
1656
/*
1653
 *	vm_pageout is the high level pageout daemon.
1657
 *	vm_pageout_init initialises basic pageout daemon settings.
1654
 */
1658
 */
1655
static void
1659
static void
1656
vm_pageout(void)
1660
vm_pageout_init(void)
1657
{
1661
{
1658
#if MAXMEMDOM > 1
1659
	int error, i;
1660
#endif
1661
1662
	/*
1662
	/*
1663
	 * Initialize some paging parameters.
1663
	 * Initialize some paging parameters.
1664
	 */
1664
	 */
Lines 1704-1710 static void Link Here
1704
	/* XXX does not really belong here */
1704
	/* XXX does not really belong here */
1705
	if (vm_page_max_wired == 0)
1705
	if (vm_page_max_wired == 0)
1706
		vm_page_max_wired = cnt.v_free_count / 3;
1706
		vm_page_max_wired = cnt.v_free_count / 3;
1707
}
1707
1708
1709
/*
1710
 *     vm_pageout is the high level pageout daemon.
1711
 */
1712
static void
1713
vm_pageout(void)
1714
{
1715
#if MAXMEMDOM > 1
1716
	int error, i;
1717
#endif
1718
1708
	swap_pager_swap_init();
1719
	swap_pager_swap_init();
1709
#if MAXMEMDOM > 1
1720
#if MAXMEMDOM > 1
1710
	for (i = 1; i < vm_ndomains; i++) {
1721
	for (i = 1; i < vm_ndomains; i++) {

Return to bug 187594