View | Details | Raw Unified | Return to bug 187594 | Differences between
and this patch

Collapse All | Expand All

(-)sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (-7 lines)
Lines 133-145 kmem_size(void) Link Here
133
	return (kmem_size_val);
133
	return (kmem_size_val);
134
}
134
}
135
135
136
uint64_t
137
kmem_used(void)
138
{
139
140
	return (vmem_size(kmem_arena, VMEM_ALLOC));
141
}
142
143
static int
136
static int
144
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
137
kmem_std_constructor(void *mem, int size __unused, void *private, int flags)
145
{
138
{
(-)sys/cddl/compat/opensolaris/sys/kmem.h (-1 / +3 lines)
Lines 66-72 typedef struct kmem_cache { Link Here
66
void *zfs_kmem_alloc(size_t size, int kmflags);
66
void *zfs_kmem_alloc(size_t size, int kmflags);
67
void zfs_kmem_free(void *buf, size_t size);
67
void zfs_kmem_free(void *buf, size_t size);
68
uint64_t kmem_size(void);
68
uint64_t kmem_size(void);
69
uint64_t kmem_used(void);
70
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
69
kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
71
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
70
    int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
72
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
71
    void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
Lines 78-83 void kmem_reap(void); Link Here
78
int kmem_debugging(void);
77
int kmem_debugging(void);
79
void *calloc(size_t n, size_t s);
78
void *calloc(size_t n, size_t s);
80
79
80
#define	freemem				(cnt.v_free_count + cnt.v_cache_count)
81
#define	minfree				cnt.v_free_min
82
#define	heap_arena			kmem_arena
81
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
83
#define	kmem_alloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags))
82
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
84
#define	kmem_zalloc(size, kmflags)	zfs_kmem_alloc((size), (kmflags) | M_ZERO)
83
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
85
#define	kmem_free(buf, size)		zfs_kmem_free((buf), (size))
(-)sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (-53 / +199 lines)
Lines 138-143 Link Here
138
#include <sys/sdt.h>
138
#include <sys/sdt.h>
139
139
140
#include <vm/vm_pageout.h>
140
#include <vm/vm_pageout.h>
141
#include <machine/vmparam.h>
141
142
142
#ifdef illumos
143
#ifdef illumos
143
#ifndef _KERNEL
144
#ifndef _KERNEL
Lines 188-201 int arc_lotsfree_percent = 10; Link Here
188
static int arc_dead;
189
static int arc_dead;
189
extern int zfs_prefetch_disable;
190
extern int zfs_prefetch_disable;
190
191
192
/* 
193
 * KD 2014-09-22
194
 * We have to be able to test for UIO use inside the arc allocator.
195
 * NOTE: DO NOT MODIFY HERE!
196
 */
197
extern int zio_use_uma;
198
191
/*
199
/*
192
 * The arc has filled available memory and has now warmed up.
200
 * The arc has filled available memory and has now warmed up.
193
 */
201
 */
194
static boolean_t arc_warm;
202
static boolean_t arc_warm;
195
203
196
/*
197
 * These tunables are for performance analysis.
198
 */
199
uint64_t zfs_arc_max;
204
uint64_t zfs_arc_max;
200
uint64_t zfs_arc_min;
205
uint64_t zfs_arc_min;
201
uint64_t zfs_arc_meta_limit = 0;
206
uint64_t zfs_arc_meta_limit = 0;
Lines 204-210 int zfs_arc_shrink_shift = 0; Link Here
204
int zfs_arc_p_min_shift = 0;
209
int zfs_arc_p_min_shift = 0;
205
int zfs_disable_dup_eviction = 0;
210
int zfs_disable_dup_eviction = 0;
206
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
211
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
212
u_int zfs_arc_free_target = (1 << 16); /* default before pagedaemon init only */
213
int zfs_arc_reclaim_cache_free = 1;
207
214
215
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
216
217
#ifdef _KERNEL
218
static void
219
arc_free_target_init(void *unused __unused)
220
{
221
	zfs_arc_free_target = vm_pageout_wakeup_thresh + ((cnt.v_free_target - vm_pageout_wakeup_thresh) / 2);
222
}
223
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
224
    arc_free_target_init, NULL);
225
208
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
226
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
209
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
227
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
210
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
228
TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit);
Lines 217-223 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ Link Here
217
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
235
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
218
    &zfs_arc_average_blocksize, 0,
236
    &zfs_arc_average_blocksize, 0,
219
    "ARC average blocksize");
237
    "ARC average blocksize");
238
SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_reclaim_cache_free, CTLFLAG_RWTUN,
239
    &zfs_arc_reclaim_cache_free, 0,
240
    "ARC treats cached pages as free blocksize");
241
/*
242
 * We don't have a tunable for arc_free_target due to the dependency on
243
 * pagedaemon initialisation.
244
 */
245
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
246
    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int),
247
    sysctl_vfs_zfs_arc_free_target, "IU",
248
    "Desired number of free pages below which ARC triggers reclaim");
220
249
250
static int
251
sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS)
252
{
253
	u_int val;
254
	int err;
255
256
	val = zfs_arc_free_target;
257
	err = sysctl_handle_int(oidp, &val, 0, req);
258
	if (err != 0 || req->newptr == NULL)
259
		return (err);
260
261
	if (val < minfree)
262
		return (EINVAL);
263
	if (val > cnt.v_page_count)
264
		return (EINVAL);
265
266
	zfs_arc_free_target = val;
267
268
	return (0);
269
}
270
#endif
271
221
/*
272
/*
222
 * Note that buffers can be in one of 6 states:
273
 * Note that buffers can be in one of 6 states:
223
 *	ARC_anon	- anonymous (discussed below)
274
 *	ARC_anon	- anonymous (discussed below)
Lines 2421-2426 arc_flush(spa_t *spa) Link Here
2421
void
2472
void
2422
arc_shrink(void)
2473
arc_shrink(void)
2423
{
2474
{
2475
2424
	if (arc_c > arc_c_min) {
2476
	if (arc_c > arc_c_min) {
2425
		uint64_t to_free;
2477
		uint64_t to_free;
2426
2478
Lines 2429-2434 arc_shrink(void) Link Here
2429
#else
2481
#else
2430
		to_free = arc_c >> arc_shrink_shift;
2482
		to_free = arc_c >> arc_shrink_shift;
2431
#endif
2483
#endif
2484
		DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t,
2485
			arc_c_min, uint64_t, arc_p, uint64_t, to_free);
2486
2432
		if (arc_c > arc_c_min + to_free)
2487
		if (arc_c > arc_c_min + to_free)
2433
			atomic_add_64(&arc_c, -to_free);
2488
			atomic_add_64(&arc_c, -to_free);
2434
		else
2489
		else
Lines 2439-2450 arc_shrink(void) Link Here
2439
			arc_c = MAX(arc_size, arc_c_min);
2494
			arc_c = MAX(arc_size, arc_c_min);
2440
		if (arc_p > arc_c)
2495
		if (arc_p > arc_c)
2441
			arc_p = (arc_c >> 1);
2496
			arc_p = (arc_c >> 1);
2497
2498
		DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t,
2499
			arc_p);
2500
2442
		ASSERT(arc_c >= arc_c_min);
2501
		ASSERT(arc_c >= arc_c_min);
2443
		ASSERT((int64_t)arc_p >= 0);
2502
		ASSERT((int64_t)arc_p >= 0);
2444
	}
2503
	}
2445
2504
2446
	if (arc_size > arc_c)
2505
	if (arc_size > arc_c) {
2506
		DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size,
2507
			uint64_t, arc_c);
2447
		arc_adjust();
2508
		arc_adjust();
2509
	}
2448
}
2510
}
2449
2511
2450
static int needfree = 0;
2512
static int needfree = 0;
Lines 2452-2469 static int needfree = 0; Link Here
2452
static int
2514
static int
2453
arc_reclaim_needed(void)
2515
arc_reclaim_needed(void)
2454
{
2516
{
2517
	u_int fm;
2455
2518
2456
#ifdef _KERNEL
2519
#ifdef _KERNEL
2520
	if (arc_size <= arc_c_min) {
2521
		DTRACE_PROBE2(arc__reclaim_min, uint64_t, arc_size,
2522
		    uint64_t, arc_c_min);
2523
		return (0);
2524
	}
2457
2525
2458
	if (needfree)
2526
	if (needfree) {
2527
		DTRACE_PROBE(arc__reclaim_needfree);
2459
		return (1);
2528
		return (1);
2529
	}
2460
2530
2461
	/*
2531
	/*
2462
	 * Cooperate with pagedaemon when it's time for it to scan
2532
	 * Cooperate with pagedaemon when it's time for it to scan
2463
	 * and reclaim some pages.
2533
	 * and reclaim some pages.
2464
	 */
2534
	 */
2465
	if (vm_paging_needed())
2535
	if (zfs_arc_reclaim_cache_free == 0)
2536
		fm = cnt.v_free_count;
2537
	else
2538
		fm = freemem;
2539
2540
	if (fm < zfs_arc_free_target) {
2541
		DTRACE_PROBE3(arc__reclaim_freemem, uint64_t,
2542
		    fm, uint64_t, zfs_arc_free_target,
2543
		    int, zfs_arc_reclaim_cache_free);
2466
		return (1);
2544
		return (1);
2545
	}
2467
2546
2468
#ifdef sun
2547
#ifdef sun
2469
	/*
2548
	/*
Lines 2491-2498 arc_reclaim_needed(void) Link Here
2491
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2570
	if (availrmem < swapfs_minfree + swapfs_reserve + extra)
2492
		return (1);
2571
		return (1);
2493
2572
2494
#if defined(__i386)
2495
	/*
2573
	/*
2574
	 * Check that we have enough availrmem that memory locking (e.g., via
2575
	 * mlock(3C) or memcntl(2)) can still succeed.  (pages_pp_maximum
2576
	 * stores the number of pages that cannot be locked; when availrmem
2577
	 * drops below pages_pp_maximum, page locking mechanisms such as
2578
	 * page_pp_lock() will fail.)
2579
	 */
2580
	if (availrmem <= pages_pp_maximum)
2581
		return (1);
2582
2583
#endif	/* sun */
2584
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
2585
	/*
2496
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2586
	 * If we're on an i386 platform, it's possible that we'll exhaust the
2497
	 * kernel heap space before we ever run out of available physical
2587
	 * kernel heap space before we ever run out of available physical
2498
	 * memory.  Most checks of the size of the heap_area compare against
2588
	 * memory.  Most checks of the size of the heap_area compare against
Lines 2503-2534 arc_reclaim_needed(void) Link Here
2503
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2593
	 * heap is allocated.  (Or, in the calculation, if less than 1/4th is
2504
	 * free)
2594
	 * free)
2505
	 */
2595
	 */
2506
	if (btop(vmem_size(heap_arena, VMEM_FREE)) <
2596
	if (vmem_size(heap_arena, VMEM_FREE) <
2507
	    (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
2597
	    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2)) {
2598
		DTRACE_PROBE2(arc__reclaim_used, uint64_t,
2599
		    vmem_size(heap_arena, VMEM_FREE), uint64_t,
2600
		    (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2);
2508
		return (1);
2601
		return (1);
2602
	}
2509
#endif
2603
#endif
2510
#else	/* !sun */
2604
#ifdef sun
2511
	if (kmem_used() > (kmem_size() * 3) / 4)
2605
	/*
2606
	 * If zio data pages are being allocated out of a separate heap segment,
2607
	 * then enforce that the size of available vmem for this arena remains
2608
	 * above about 1/16th free.
2609
	 *
2610
	 * Note: The 1/16th arena free requirement was put in place
2611
	 * to aggressively evict memory from the arc in order to avoid
2612
	 * memory fragmentation issues.
2613
	 */
2614
	if (zio_arena != NULL &&
2615
	    vmem_size(zio_arena, VMEM_FREE) <
2616
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 4))
2512
		return (1);
2617
		return (1);
2513
#endif	/* sun */
2618
#endif	/* sun */
2514
2619
#else	/* _KERNEL */
2515
#else
2516
	if (spa_get_random(100) == 0)
2620
	if (spa_get_random(100) == 0)
2517
		return (1);
2621
		return (1);
2518
#endif
2622
#endif	/* _KERNEL */
2623
	DTRACE_PROBE(arc__reclaim_no);
2624
2519
	return (0);
2625
	return (0);
2520
}
2626
}
2521
2627
2522
extern kmem_cache_t	*zio_buf_cache[];
2628
extern kmem_cache_t	*zio_buf_cache[];
2523
extern kmem_cache_t	*zio_data_buf_cache[];
2629
extern kmem_cache_t	*zio_data_buf_cache[];
2524
2630
2525
static void
2631
static void __used
2526
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
2632
reap_arc_caches()
2527
{
2633
{
2528
	size_t			i;
2634
	size_t		i;
2529
	kmem_cache_t		*prev_cache = NULL;
2635
	kmem_cache_t		*prev_cache = NULL;
2530
	kmem_cache_t		*prev_data_cache = NULL;
2636
	kmem_cache_t		*prev_data_cache = NULL;
2531
2637
2638
	for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
2639
		if (zio_buf_cache[i] != prev_cache) {
2640
			prev_cache = zio_buf_cache[i];
2641
			kmem_cache_reap_now(zio_buf_cache[i]);
2642
		}
2643
		if (zio_data_buf_cache[i] != prev_data_cache) {
2644
			prev_data_cache = zio_data_buf_cache[i];
2645
			kmem_cache_reap_now(zio_data_buf_cache[i]);
2646
		}
2647
	}
2648
	kmem_cache_reap_now(buf_cache);
2649
	kmem_cache_reap_now(hdr_cache);
2650
}
2651
2652
static void __used
2653
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
2654
{
2655
2656
	DTRACE_PROBE(arc__kmem_reap_start);
2532
#ifdef _KERNEL
2657
#ifdef _KERNEL
2533
	if (arc_meta_used >= arc_meta_limit) {
2658
	if (arc_meta_used >= arc_meta_limit) {
2534
		/*
2659
		/*
Lines 2552-2569 extern kmem_cache_t *zio_data_buf_cache[]; Link Here
2552
	if (strat == ARC_RECLAIM_AGGR)
2677
	if (strat == ARC_RECLAIM_AGGR)
2553
		arc_shrink();
2678
		arc_shrink();
2554
2679
2555
	for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
2680
	reap_arc_caches();
2556
		if (zio_buf_cache[i] != prev_cache) {
2681
2557
			prev_cache = zio_buf_cache[i];
2682
#ifdef sun
2558
			kmem_cache_reap_now(zio_buf_cache[i]);
2683
	/*
2559
		}
2684
	 * Ask the vmem areana to reclaim unused memory from its
2560
		if (zio_data_buf_cache[i] != prev_data_cache) {
2685
	 * quantum caches.
2561
			prev_data_cache = zio_data_buf_cache[i];
2686
	 */
2562
			kmem_cache_reap_now(zio_data_buf_cache[i]);
2687
	if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
2563
		}
2688
		vmem_qcache_reap(zio_arena);
2564
	}
2689
#endif
2565
	kmem_cache_reap_now(buf_cache);
2690
	DTRACE_PROBE(arc__kmem_reap_end);
2566
	kmem_cache_reap_now(hdr_cache);
2567
}
2691
}
2568
2692
2569
static void
2693
static void
Lines 2572-2586 arc_reclaim_thread(void *dummy __unused) Link Here
2572
	clock_t			growtime = 0;
2696
	clock_t			growtime = 0;
2573
	arc_reclaim_strategy_t	last_reclaim = ARC_RECLAIM_CONS;
2697
	arc_reclaim_strategy_t	last_reclaim = ARC_RECLAIM_CONS;
2574
	callb_cpr_t		cpr;
2698
	callb_cpr_t		cpr;
2699
	int			autoreap = 0;
2575
2700
2576
	CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
2701
	CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
2577
2702
2578
	mutex_enter(&arc_reclaim_thr_lock);
2703
	mutex_enter(&arc_reclaim_thr_lock);
2704
2579
	while (arc_thread_exit == 0) {
2705
	while (arc_thread_exit == 0) {
2706
2707
#ifdef	_KERNEL
2708
/* KD 2014-09-22
2709
 * Protect against UMA free memory bloat.  We already do this on a low-memory
2710
 * basis in the allocator; it has to happen there rather than here due to 
2711
 * response time considerations.  Make the call here once every 10 passes as 
2712
 * well; this reclaims unused UMA buffers every 10 seconds on an idle system 
2713
 * and more frequently if the reclaim thread gets woken up by low RAM 
2714
 * conditions.
2715
 */
2716
		if ((zio_use_uma) && (autoreap++ == 10)) {
2717
			autoreap = 0;
2718
			DTRACE_PROBE(arc__reclaim_timed_reap);
2719
			reap_arc_caches();
2720
		}
2721
#endif	/* _KERNEL */
2722
2580
		if (arc_reclaim_needed()) {
2723
		if (arc_reclaim_needed()) {
2581
2724
2582
			if (arc_no_grow) {
2725
			if (arc_no_grow) {
2583
				if (last_reclaim == ARC_RECLAIM_CONS) {
2726
				if (last_reclaim == ARC_RECLAIM_CONS) {
2727
					DTRACE_PROBE(arc__reclaim_aggr_no_grow);
2584
					last_reclaim = ARC_RECLAIM_AGGR;
2728
					last_reclaim = ARC_RECLAIM_AGGR;
2585
				} else {
2729
				} else {
2586
					last_reclaim = ARC_RECLAIM_CONS;
2730
					last_reclaim = ARC_RECLAIM_CONS;
Lines 2588-2593 arc_reclaim_thread(void *dummy __unused) Link Here
2588
			} else {
2732
			} else {
2589
				arc_no_grow = TRUE;
2733
				arc_no_grow = TRUE;
2590
				last_reclaim = ARC_RECLAIM_AGGR;
2734
				last_reclaim = ARC_RECLAIM_AGGR;
2735
				DTRACE_PROBE(arc__reclaim_aggr);
2591
				membar_producer();
2736
				membar_producer();
2592
			}
2737
			}
2593
2738
Lines 2602-2607 arc_reclaim_thread(void *dummy __unused) Link Here
2602
				 */
2747
				 */
2603
				arc_no_grow = TRUE;
2748
				arc_no_grow = TRUE;
2604
				last_reclaim = ARC_RECLAIM_AGGR;
2749
				last_reclaim = ARC_RECLAIM_AGGR;
2750
				DTRACE_PROBE(arc__reclaim_aggr_needfree);
2605
			}
2751
			}
2606
			arc_kmem_reap_now(last_reclaim);
2752
			arc_kmem_reap_now(last_reclaim);
2607
			arc_warm = B_TRUE;
2753
			arc_warm = B_TRUE;
Lines 2618-2623 arc_reclaim_thread(void *dummy __unused) Link Here
2618
#ifdef _KERNEL
2764
#ifdef _KERNEL
2619
		if (needfree) {
2765
		if (needfree) {
2620
			needfree = 0;
2766
			needfree = 0;
2767
			DTRACE_PROBE(arc__clear_needfree);
2621
			wakeup(&needfree);
2768
			wakeup(&needfree);
2622
		}
2769
		}
2623
#endif
2770
#endif
Lines 2692-2697 arc_adapt(int bytes, arc_state_t *state) Link Here
2692
	 * cache size, increment the target cache size
2839
	 * cache size, increment the target cache size
2693
	 */
2840
	 */
2694
	if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
2841
	if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
2842
		DTRACE_PROBE1(arc__inc_adapt, int, bytes);
2695
		atomic_add_64(&arc_c, (int64_t)bytes);
2843
		atomic_add_64(&arc_c, (int64_t)bytes);
2696
		if (arc_c > arc_c_max)
2844
		if (arc_c > arc_c_max)
2697
			arc_c = arc_c_max;
2845
			arc_c = arc_c_max;
Lines 2713-2732 arc_evict_needed(arc_buf_contents_t type) Link Here
2713
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2861
	if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
2714
		return (1);
2862
		return (1);
2715
2863
2716
#ifdef sun
2717
#ifdef _KERNEL
2718
	/*
2719
	 * If zio data pages are being allocated out of a separate heap segment,
2720
	 * then enforce that the size of available vmem for this area remains
2721
	 * above about 1/32nd free.
2722
	 */
2723
	if (type == ARC_BUFC_DATA && zio_arena != NULL &&
2724
	    vmem_size(zio_arena, VMEM_FREE) <
2725
	    (vmem_size(zio_arena, VMEM_ALLOC) >> 5))
2726
		return (1);
2727
#endif
2728
#endif	/* sun */
2729
2730
	if (arc_reclaim_needed())
2864
	if (arc_reclaim_needed())
2731
		return (1);
2865
		return (1);
2732
2866
Lines 2807-2812 arc_get_data_buf(arc_buf_t *buf) Link Here
2807
			arc_space_consume(size, ARC_SPACE_DATA);
2941
			arc_space_consume(size, ARC_SPACE_DATA);
2808
		} else {
2942
		} else {
2809
			ASSERT(type == ARC_BUFC_DATA);
2943
			ASSERT(type == ARC_BUFC_DATA);
2944
#ifdef	_KERNEL
2945
/* KD 2014-09-22
2946
 * It would be nice if we could leave this to the arc_reclaim thread.
2947
 * Unfortunately we cannot; the test has to be done here as well, because
2948
 * under heavy I/O demand we can grab enough RAM fast enough to induce
2949
 * nasty oscillation problems.  Fortunately we only need to call this when
2950
 * the system is under reasonably-severe memory stress.
2951
 */
2952
			if (zio_use_uma && (ptob(cnt.v_free_count) + size < ptob(cnt.v_free_target))) {
2953
				DTRACE_PROBE3(arc__alloc_lowmem_reap, int, cnt.v_free_count, int, size, int, cnt.v_free_target); 
2954
				reap_arc_caches();
2955
			}
2956
#endif	/* _KERNEL */
2810
			buf->b_data = zio_data_buf_alloc(size);
2957
			buf->b_data = zio_data_buf_alloc(size);
2811
			ARCSTAT_INCR(arcstat_data_size, size);
2958
			ARCSTAT_INCR(arcstat_data_size, size);
2812
			atomic_add_64(&arc_size, size);
2959
			atomic_add_64(&arc_size, size);
Lines 3885-3904 static int Link Here
3885
arc_memory_throttle(uint64_t reserve, uint64_t txg)
4032
arc_memory_throttle(uint64_t reserve, uint64_t txg)
3886
{
4033
{
3887
#ifdef _KERNEL
4034
#ifdef _KERNEL
3888
	uint64_t available_memory =
4035
	uint64_t available_memory = ptob(freemem);
3889
	    ptoa((uintmax_t)cnt.v_free_count + cnt.v_cache_count);
3890
	static uint64_t page_load = 0;
4036
	static uint64_t page_load = 0;
3891
	static uint64_t last_txg = 0;
4037
	static uint64_t last_txg = 0;
3892
4038
3893
#ifdef sun
4039
#if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC)
3894
#if defined(__i386)
3895
	available_memory =
4040
	available_memory =
3896
	    MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
4041
	    MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE)));
3897
#endif
4042
#endif
3898
#endif	/* sun */
3899
4043
3900
	if (cnt.v_free_count + cnt.v_cache_count >
4044
	if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100)
3901
	    (uint64_t)physmem * arc_lotsfree_percent / 100)
3902
		return (0);
4045
		return (0);
3903
4046
3904
	if (txg > last_txg) {
4047
	if (txg > last_txg) {
Lines 3911-3917 arc_memory_throttle(uint64_t reserve, uint64_t txg Link Here
3911
	 * continue to let page writes occur as quickly as possible.
4054
	 * continue to let page writes occur as quickly as possible.
3912
	 */
4055
	 */
3913
	if (curproc == pageproc) {
4056
	if (curproc == pageproc) {
3914
		if (page_load > available_memory / 4)
4057
		if (page_load > MAX(ptob(minfree), available_memory) / 4)
3915
			return (SET_ERROR(ERESTART));
4058
			return (SET_ERROR(ERESTART));
3916
		/* Note: reserve is inflated, so we deflate */
4059
		/* Note: reserve is inflated, so we deflate */
3917
		page_load += reserve / 8;
4060
		page_load += reserve / 8;
Lines 3939-3946 arc_tempreserve_space(uint64_t reserve, uint64_t t Link Here
3939
	int error;
4082
	int error;
3940
	uint64_t anon_size;
4083
	uint64_t anon_size;
3941
4084
3942
	if (reserve > arc_c/4 && !arc_no_grow)
4085
	if (reserve > arc_c/4 && !arc_no_grow) {
3943
		arc_c = MIN(arc_c_max, reserve * 4);
4086
		arc_c = MIN(arc_c_max, reserve * 4);
4087
		DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c);
4088
	}
3944
	if (reserve > arc_c)
4089
	if (reserve > arc_c)
3945
		return (SET_ERROR(ENOMEM));
4090
		return (SET_ERROR(ENOMEM));
3946
4091
Lines 3994-3999 arc_lowmem(void *arg __unused, int howto __unused) Link Here
3994
	mutex_enter(&arc_lowmem_lock);
4139
	mutex_enter(&arc_lowmem_lock);
3995
	mutex_enter(&arc_reclaim_thr_lock);
4140
	mutex_enter(&arc_reclaim_thr_lock);
3996
	needfree = 1;
4141
	needfree = 1;
4142
	DTRACE_PROBE(arc__needfree);
3997
	cv_signal(&arc_reclaim_thr_cv);
4143
	cv_signal(&arc_reclaim_thr_cv);
3998
4144
3999
	/*
4145
	/*
(-)sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c (+25 lines)
Lines 42-47 Link Here
42
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
42
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
43
    uint64_t arg1, uint64_t arg2);
43
    uint64_t arg1, uint64_t arg2);
44
44
45
extern	int	zio_use_uma;	/* Needs to be visible; DO NOT MODIFY! */
45
46
46
dmu_tx_t *
47
dmu_tx_t *
47
dmu_tx_create_dd(dsl_dir_t *dd)
48
dmu_tx_create_dd(dsl_dir_t *dd)
Lines 1281-1286 dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how) Link Here
1281
	if (txg_how == TXG_WAITED)
1282
	if (txg_how == TXG_WAITED)
1282
		tx->tx_waited = B_TRUE;
1283
		tx->tx_waited = B_TRUE;
1283
1284
1285
#ifdef	_KERNEL
1286
	/*
1287
 	 * KD 2014-09-22
1288
 	 * If UMA is enabled it can only return a previously-used block
1289
	 * of identical size to what it had out before.  If it's not the
1290
	 * same size it will allocate a new one.  This is a problem because
1291
	 * dirty_data_max is the total dirty write data allowed out at any 
1292
	 * given time, but with UMA on that can multiply by the number of 
1293
	 * different block sizes (!!) requested in terms of free RAM that 
1294
	 * is left allocated but unused.  For this reason never allow 
1295
	 * dirty_data_max to exceed the difference between the paging 
1296
	 * threshold and the current free memory, with a minimum of 256MB.
1297
	 * This throttles "burst" allocations and prevents the system from 
1298
	 * choking during times of high write I/O demand.
1299
 	 */
1300
	if (zio_use_uma) {
1301
		zfs_dirty_data_max = 1 << 28;
1302
		zfs_dirty_data_max = MAX(zfs_dirty_data_max, ptob(cnt.v_free_count - cnt.v_free_target));
1303
		zfs_dirty_data_max = MIN(zfs_dirty_data_max, ptob(physmem) * zfs_dirty_data_max_percent / 100);
1304
		zfs_dirty_data_max = MIN(zfs_dirty_data_max, zfs_dirty_data_max_max);
1305
		DTRACE_PROBE1(dmu__tx_dirty, uint64_t, zfs_dirty_data_max / (1024 * 1024));
1306
	}
1307
#endif	/* _KERNEL */
1308
1284
	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
1309
	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
1285
		dmu_tx_unassign(tx);
1310
		dmu_tx_unassign(tx);
1286
1311
(-)sys/vm/vm_pageout.c (-8 / +27 lines)
Lines 76-81 Link Here
76
__FBSDID("$FreeBSD$");
76
__FBSDID("$FreeBSD$");
77
77
78
#include "opt_vm.h"
78
#include "opt_vm.h"
79
#include "opt_kdtrace.h"
79
#include <sys/param.h>
80
#include <sys/param.h>
80
#include <sys/systm.h>
81
#include <sys/systm.h>
81
#include <sys/kernel.h>
82
#include <sys/kernel.h>
Lines 89-94 __FBSDID("$FreeBSD$"); Link Here
89
#include <sys/racct.h>
90
#include <sys/racct.h>
90
#include <sys/resourcevar.h>
91
#include <sys/resourcevar.h>
91
#include <sys/sched.h>
92
#include <sys/sched.h>
93
#include <sys/sdt.h>
92
#include <sys/signalvar.h>
94
#include <sys/signalvar.h>
93
#include <sys/smp.h>
95
#include <sys/smp.h>
94
#include <sys/vnode.h>
96
#include <sys/vnode.h>
Lines 115-124 __FBSDID("$FreeBSD$"); Link Here
115
117
116
/* the kernel process "vm_pageout"*/
118
/* the kernel process "vm_pageout"*/
117
static void vm_pageout(void);
119
static void vm_pageout(void);
120
static void vm_pageout_init(void);
118
static int vm_pageout_clean(vm_page_t);
121
static int vm_pageout_clean(vm_page_t);
119
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
122
static void vm_pageout_scan(struct vm_domain *vmd, int pass);
120
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
123
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
121
124
125
SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
126
    NULL);
127
122
struct proc *pageproc;
128
struct proc *pageproc;
123
129
124
static struct kproc_desc page_kp = {
130
static struct kproc_desc page_kp = {
Lines 126-134 static struct kproc_desc page_kp = { Link Here
126
	vm_pageout,
132
	vm_pageout,
127
	&pageproc
133
	&pageproc
128
};
134
};
129
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start,
135
SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
130
    &page_kp);
136
    &page_kp);
131
137
138
SDT_PROVIDER_DEFINE(vm);
139
SDT_PROBE_DEFINE(vm, , , vm__lowmem_cache);
140
SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
141
132
#if !defined(NO_SWAPPING)
142
#if !defined(NO_SWAPPING)
133
/* the kernel process "vm_daemon"*/
143
/* the kernel process "vm_daemon"*/
134
static void vm_daemon(void);
144
static void vm_daemon(void);
Lines 663-668 vm_pageout_grow_cache(int tries, vm_paddr_t low, v Link Here
663
		 * may acquire locks and/or sleep, so they can only be invoked
673
		 * may acquire locks and/or sleep, so they can only be invoked
664
		 * when "tries" is greater than zero.
674
		 * when "tries" is greater than zero.
665
		 */
675
		 */
676
		SDT_PROBE0(vm, , , vm__lowmem_cache);
666
		EVENTHANDLER_INVOKE(vm_lowmem, 0);
677
		EVENTHANDLER_INVOKE(vm_lowmem, 0);
667
678
668
		/*
679
		/*
Lines 904-910 vm_pageout_map_deactivate_pages(map, desired) Link Here
904
 *	pass 1 - Move inactive to cache or free
915
 *	pass 1 - Move inactive to cache or free
905
 *	pass 2 - Launder dirty pages
916
 *	pass 2 - Launder dirty pages
906
 */
917
 */
907
static void
918
static void __used
908
vm_pageout_scan(struct vm_domain *vmd, int pass)
919
vm_pageout_scan(struct vm_domain *vmd, int pass)
909
{
920
{
910
	vm_page_t m, next;
921
	vm_page_t m, next;
Lines 925-930 vm_pageout_scan(struct vm_domain *vmd, int pass) Link Here
925
		/*
936
		/*
926
		 * Decrease registered cache sizes.
937
		 * Decrease registered cache sizes.
927
		 */
938
		 */
939
		SDT_PROBE0(vm, , , vm__lowmem_scan);
928
		EVENTHANDLER_INVOKE(vm_lowmem, 0);
940
		EVENTHANDLER_INVOKE(vm_lowmem, 0);
929
		/*
941
		/*
930
		 * We do this explicitly after the caches have been
942
		 * We do this explicitly after the caches have been
Lines 1650-1664 vm_pageout_worker(void *arg) Link Here
1650
}
1662
}
1651
1663
1652
/*
1664
/*
1653
 *	vm_pageout is the high level pageout daemon.
1665
 *	vm_pageout_init initialises basic pageout daemon settings.
1654
 */
1666
 */
1655
static void
1667
static void
1656
vm_pageout(void)
1668
vm_pageout_init(void)
1657
{
1669
{
1658
#if MAXMEMDOM > 1
1659
	int error, i;
1660
#endif
1661
1662
	/*
1670
	/*
1663
	 * Initialize some paging parameters.
1671
	 * Initialize some paging parameters.
1664
	 */
1672
	 */
Lines 1704-1710 static void Link Here
1704
	/* XXX does not really belong here */
1712
	/* XXX does not really belong here */
1705
	if (vm_page_max_wired == 0)
1713
	if (vm_page_max_wired == 0)
1706
		vm_page_max_wired = cnt.v_free_count / 3;
1714
		vm_page_max_wired = cnt.v_free_count / 3;
1715
}
1707
1716
1717
/*
1718
 *     vm_pageout is the high level pageout daemon.
1719
 */
1720
static void
1721
vm_pageout(void)
1722
{
1723
#if MAXMEMDOM > 1
1724
	int error, i;
1725
#endif
1726
1708
	swap_pager_swap_init();
1727
	swap_pager_swap_init();
1709
#if MAXMEMDOM > 1
1728
#if MAXMEMDOM > 1
1710
	for (i = 1; i < vm_ndomains; i++) {
1729
	for (i = 1; i < vm_ndomains; i++) {

Return to bug 187594