View | Details | Raw Unified | Return to bug 187594 | Differences between
and this patch

Collapse All | Expand All

(-)cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (-16 / +156 lines)
Lines 362-368 int zfs_arc_no_grow_shift = 0; Link Here
362
int zfs_arc_p_min_shift = 0;
362
int zfs_arc_p_min_shift = 0;
363
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
363
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
364
u_int zfs_arc_free_target = 0;
364
u_int zfs_arc_free_target = 0;
365
u_int zfs_arc_wakeup_pager = 0;
366
u_int zfs_arc_wakeup_delay = 500;	/* 500ms between pager wakeups min */
365
367
368
#define	WAKE_PAGER
369
#ifdef	WAKE_PAGER
370
#define	FREE_TARGET_CONSTANT	10 / 8  /* Target above pageout_wakeup_thresh */
371
static int	arc_init_done = 0;      /* After arc_warm is valid */
372
extern void	pagedaemon_wakeup(void);
373
#endif
374
366
/* Absolute min for arc min / max is 16MB. */
375
/* Absolute min for arc min / max is 16MB. */
367
static uint64_t arc_abs_min = 16 << 20;
376
static uint64_t arc_abs_min = 16 << 20;
368
377
Lines 379-385 static void Link Here
379
arc_free_target_init(void *unused __unused)
388
arc_free_target_init(void *unused __unused)
380
{
389
{
381
390
382
	zfs_arc_free_target = vm_pageout_wakeup_thresh;
391
	zfs_arc_free_target = vm_pageout_wakeup_thresh + (vm_pageout_wakeup_thresh / 5);
392
	zfs_arc_wakeup_pager = vm_pageout_wakeup_thresh + ((vm_cnt.v_free_target - vm_pageout_wakeup_thresh) / 2);	
393
383
}
394
}
384
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
395
SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY,
385
    arc_free_target_init, NULL);
396
    arc_free_target_init, NULL);
Lines 4225-4237 int64_t arc_pages_pp_reserve = 64; Link Here
4225
 */
4236
 */
4226
int64_t arc_swapfs_reserve = 64;
4237
int64_t arc_swapfs_reserve = 64;
4227
4238
4239
typedef enum free_memory_measure_t {
4240
	FMM_EXCLUDE_ZONE_CACHE,
4241
	FMM_INCLUDE_ZONE_CACHE
4242
} free_memory_measure_t;
4243
4228
/*
4244
/*
4229
 * Return the amount of memory that can be consumed before reclaim will be
4245
 * Return the amount of memory that can be consumed before reclaim will be
4230
 * needed.  Positive if there is sufficient free memory, negative indicates
4246
 * needed.  Positive if there is sufficient free memory, negative indicates
4231
 * the amount of memory that needs to be freed up.
4247
 * the amount of memory that needs to be freed up.
4232
 */
4248
 */
4249
4250
static int64_t arc_check_uma_cache(int64_t lowest);
4251
4233
static int64_t
4252
static int64_t
4234
arc_available_memory(void)
4253
arc_available_memory(free_memory_measure_t zone_measure)
4235
{
4254
{
4236
	int64_t lowest = INT64_MAX;
4255
	int64_t lowest = INT64_MAX;
4237
	int64_t n;
4256
	int64_t n;
Lines 4238-4244 static int64_t Link Here
4238
	free_memory_reason_t r = FMR_UNKNOWN;
4257
	free_memory_reason_t r = FMR_UNKNOWN;
4239
4258
4240
#ifdef _KERNEL
4259
#ifdef _KERNEL
4260
#ifdef	WAKE_PAGER
4261
	sbintime_t now;
4262
	static sbintime_t last_pagedaemon_wake = 0;
4263
#endif	/* WAKE_PAGER */
4264
4241
	if (needfree > 0) {
4265
	if (needfree > 0) {
4266
		n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
4267
		needfree = n > 0 ? n : 0;
4268
	}
4269
	if (needfree > 0) {
4242
		n = PAGESIZE * (-needfree);
4270
		n = PAGESIZE * (-needfree);
4243
		if (n < lowest) {
4271
		if (n < lowest) {
4244
			lowest = n;
4272
			lowest = n;
Lines 4246-4256 static int64_t Link Here
4246
		}
4274
		}
4247
	}
4275
	}
4248
4276
4277
#ifdef	WAKE_PAGER
4278
/*
4279
 * When arc is initialized then check to see if we're in a VM "warming" zone,
4280
 * and if so then wake the pager -- the intent being to demote inactive pages.
4281
 */
4282
	if (arc_init_done) {
4283
		now = getsbinuptime();
4284
		if ((now - last_pagedaemon_wake) / SBT_1MS > zfs_arc_wakeup_delay) {
4285
			last_pagedaemon_wake = now;
4286
#ifdef	REAP_ARC
4287
			arc_no_wake_event++;    /* Set bypass flag for ARC */
4288
#endif
4289
			if ( ( ((int64_t) freemem - zfs_arc_wakeup_pager) < 0) && (arc_warm == B_TRUE) ) {
4290
#ifdef	REAP_ARC
4291
				arc_kmem_reap_now(0);   /* Reap caches if we're close */
4292
#endif
4293
				DTRACE_PROBE(arc__wake_pagedaemon);
4294
				(void) pagedaemon_wakeup();    /* Wake the pager */
4295
#ifdef	REAP_ARC
4296
			} else {
4297
				if ( ((int64_t) freemem - vm_cnt.v_free_target) < 0) {
4298
					arc_kmem_reap_now(1);   /* Reap one cache if lots of memory */
4299
					DTRACE_PROBE2(arc__reap_one, int, zfs_arc_last_slab, int, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
4300
				}
4301
#endif
4302
			}
4303
		}
4304
	}
4305
4306
#endif	/* WAKE_PAGER */
4249
	/*
4307
	/*
4250
	 * Cooperate with pagedaemon when it's time for it to scan
4308
	 * Cooperate with pagedaemon when it's time for it to scan
4251
	 * and reclaim some pages.
4309
	 * and reclaim some pages.
4252
	 */
4310
	 */
4253
	n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target);
4311
	n = PAGESIZE * ((int64_t)freemem - (int64_t)zfs_arc_free_target - (int64_t)vm_cnt.v_free_reserved);
4254
	if (n < lowest) {
4312
	if (n < lowest) {
4255
		lowest = n;
4313
		lowest = n;
4256
		r = FMR_LOTSFREE;
4314
		r = FMR_LOTSFREE;
Lines 4355-4360 static int64_t Link Here
4355
		}
4413
		}
4356
	}
4414
	}
4357
4415
4416
	/* Some memory can be in zone cache elements, for this case
4417
	 * ARC cache not under memory pressure and can rise.
4418
	 * zone_measure == FMM_INCLUDE_ZONE_CACHE flaged this
4419
	 */
4420
	if (lowest < 0 && zone_measure == FMM_INCLUDE_ZONE_CACHE) {
4421
		lowest = arc_check_uma_cache(lowest);
4422
		if (lowest >= 0)
4423
				r = FMR_UNKNOWN;
4424
	}
4425
4358
#else	/* _KERNEL */
4426
#else	/* _KERNEL */
4359
	/* Every 100 calls, free a small amount */
4427
	/* Every 100 calls, free a small amount */
4360
	if (spa_get_random(100) == 0)
4428
	if (spa_get_random(100) == 0)
Lines 4376-4382 static int64_t Link Here
4376
static boolean_t
4444
static boolean_t
4377
arc_reclaim_needed(void)
4445
arc_reclaim_needed(void)
4378
{
4446
{
4379
	return (arc_available_memory() < 0);
4447
	return (arc_available_memory(FMM_INCLUDE_ZONE_CACHE) < 0);
4380
}
4448
}
4381
4449
4382
extern kmem_cache_t	*zio_buf_cache[];
4450
extern kmem_cache_t	*zio_buf_cache[];
Lines 4436-4441 arc_kmem_reap_now(void) Link Here
4436
	DTRACE_PROBE(arc__kmem_reap_end);
4504
	DTRACE_PROBE(arc__kmem_reap_end);
4437
}
4505
}
4438
4506
4507
int sysctl_drain_cache = 1;
4508
SYSCTL_INT(_vfs_zfs, OID_AUTO, drain_uma_cache, CTLFLAG_RW, &sysctl_drain_cache, 0, "drain per-CPU UMA cache");
4509
4510
4511
#ifdef _KERNEL
4512
static int64_t
4513
arc_check_uma_cache(int64_t lowest)
4514
{
4515
	int			iter = 4;
4516
	int			step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
4517
	int			n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
4518
4519
	while (n >= 0) {
4520
		lowest += uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
4521
		if (lowest >= 0)
4522
			return lowest;
4523
		n -= step;
4524
		if(--iter == 0) {
4525
			if (step > 1) step >>= 1;
4526
			iter = 4;
4527
		}
4528
	}
4529
	return lowest;
4530
}
4531
#endif
4532
4533
static void
4534
arc_drain_uma_cache(uint64_t target)
4535
{
4536
	int			iter = 4;
4537
	int			step = 1 << (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT - 3);
4538
	int			n = (SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT) - 1;
4539
	size_t			free_size;
4540
4541
	DTRACE_PROBE2(arc__drain_uma_cache_start, uint64_t, target, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
4542
#ifdef _KERNEL
4543
	free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
4544
	if (target <= free_size)
4545
		return;
4546
	while (n >= 0) {
4547
		free_size = uma_zone_get_free_size(zio_data_buf_cache[n]->kc_zone);
4548
		if (free_size) {
4549
			if (sysctl_drain_cache)
4550
				uma_reclaim_zone_cache(zio_data_buf_cache[n]->kc_zone);
4551
			kmem_cache_reap_now(zio_data_buf_cache[n]);
4552
			DTRACE_PROBE3(arc__drain_uma_cache_zone, char *, zio_data_buf_cache[n]->kc_name, size_t, free_size, uint64_t, (uint64_t)vm_cnt.v_free_count * PAGESIZE);
4553
			free_size = (uint64_t)vm_cnt.v_free_count * PAGESIZE;
4554
			if (target <= free_size)
4555
				break;
4556
		}
4557
		n -= step;
4558
		if(--iter == 0) {
4559
			if (step > 1) step >>= 1;
4560
			iter = 4;
4561
		}
4562
	}
4563
#endif
4564
	DTRACE_PROBE(arc__drain_uma_cache_end);
4565
}
4566
4439
/*
4567
/*
4440
 * Threads can block in arc_get_data_impl() waiting for this thread to evict
4568
 * Threads can block in arc_get_data_impl() waiting for this thread to evict
4441
 * enough data and signal them to proceed. When this happens, the threads in
4569
 * enough data and signal them to proceed. When this happens, the threads in
Lines 4487-4493 arc_reclaim_thread(void *dummy __unused) Link Here
4487
		 */
4615
		 */
4488
		evicted = arc_adjust();
4616
		evicted = arc_adjust();
4489
4617
4490
		int64_t free_memory = arc_available_memory();
4618
		int64_t free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
4619
		DTRACE_PROBE2(arc__reclaim_adj, uint64_t, evicted, int64_t, free_memory);
4491
		if (free_memory < 0) {
4620
		if (free_memory < 0) {
4492
4621
4493
			arc_no_grow = B_TRUE;
4622
			arc_no_grow = B_TRUE;
Lines 4499-4519 arc_reclaim_thread(void *dummy __unused) Link Here
4499
			 */
4628
			 */
4500
			growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
4629
			growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
4501
4630
4631
#ifdef _KERNEL
4632
			if (arc_check_uma_cache(free_memory) >= 0)
4633
				arc_drain_uma_cache((uint64_t)freemem * PAGESIZE - free_memory);
4634
#else
4502
			arc_kmem_reap_now();
4635
			arc_kmem_reap_now();
4503
4636
#endif
4637
			
4504
			/*
4638
			/*
4505
			 * If we are still low on memory, shrink the ARC
4639
			 * If we are still low on memory, shrink the ARC
4506
			 * so that we have arc_shrink_min free space.
4640
			 * so that we have arc_shrink_min free space.
4507
			 */
4641
			 */
4508
			free_memory = arc_available_memory();
4642
			free_memory = arc_available_memory(FMM_EXCLUDE_ZONE_CACHE);
4509
4643
4510
			int64_t to_free =
4644
			int64_t to_free =
4511
			    (arc_c >> arc_shrink_shift) - free_memory;
4645
			    (arc_c >> arc_shrink_shift) - free_memory;
4646
			DTRACE_PROBE3(arc__reclaim_tst, int64_t, to_free, int64_t, free_memory, long, needfree);
4512
			if (to_free > 0) {
4647
			if (to_free > 0) {
4513
#ifdef _KERNEL
4648
#ifdef _KERNEL
4514
				to_free = MAX(to_free, ptob(needfree));
4649
				to_free = MAX(to_free, ptob(needfree));
4650
				uint64_t free_target = 
4651
					(uint64_t)freemem * PAGESIZE - free_memory;
4515
#endif
4652
#endif
4516
				arc_shrink(to_free);
4653
				arc_shrink(to_free);
4654
#ifdef _KERNEL
4655
				arc_drain_uma_cache(free_target);
4656
#else
4657
				arc_kmem_reap_now();
4658
#endif
4659
				DTRACE_PROBE(arc__reclaim_shr);
4517
			}
4660
			}
4518
		} else if (free_memory < arc_c >> arc_no_grow_shift) {
4661
		} else if (free_memory < arc_c >> arc_no_grow_shift) {
4519
			arc_no_grow = B_TRUE;
4662
			arc_no_grow = B_TRUE;
Lines 6308-6327 static eventhandler_tag arc_event_lowmem = NULL; Link Here
6308
static void
6451
static void
6309
arc_lowmem(void *arg __unused, int howto __unused)
6452
arc_lowmem(void *arg __unused, int howto __unused)
6310
{
6453
{
6454
	int64_t n;
6311
6455
6312
	mutex_enter(&arc_reclaim_lock);
6456
	mutex_enter(&arc_reclaim_lock);
6313
	/* XXX: Memory deficit should be passed as argument. */
6457
	/* XXX: Memory deficit should be passed as argument. */
6314
	needfree = btoc(arc_c >> arc_shrink_shift);
6458
	n = (int64_t)vm_cnt.v_free_target - (int64_t)vm_cnt.v_free_count;
6459
	needfree = (n>0) ? n : vm_cnt.v_free_target >> 8;
6315
	DTRACE_PROBE(arc__needfree);
6460
	DTRACE_PROBE(arc__needfree);
6316
	cv_signal(&arc_reclaim_thread_cv);
6461
	cv_signal(&arc_reclaim_thread_cv);
6317
6318
	/*
6319
	 * It is unsafe to block here in arbitrary threads, because we can come
6320
	 * here from ARC itself and may hold ARC locks and thus risk a deadlock
6321
	 * with ARC reclaim thread.
6322
	 */
6323
	if (curproc == pageproc)
6324
		(void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock);
6325
	mutex_exit(&arc_reclaim_lock);
6462
	mutex_exit(&arc_reclaim_lock);
6326
}
6463
}
6327
#endif
6464
#endif
Lines 6632-6637 arc_init(void) Link Here
6632
		printf("             in /boot/loader.conf.\n");
6769
		printf("             in /boot/loader.conf.\n");
6633
	}
6770
	}
6634
#endif
6771
#endif
6772
#ifdef	WAKE_PAGER
6773
	arc_init_done++;
6774
#endif
6635
}
6775
}
6636
6776
6637
void
6777
void
(-)vm/uma.h (+22 lines)
Lines 448-453 void uma_startup2(void); Link Here
448
void uma_reclaim(void);
448
void uma_reclaim(void);
449
449
450
/*
450
/*
451
 * Reclaims unused per-CPU cache memory from the specified zone
452
 *
453
 * Arguments:
454
 *	zone  The zone for cleanup
455
 * Returns:
456
 *	None
457
 */
458
void uma_reclaim_zone_cache(uma_zone_t zone); 
459
460
/*
451
 * Sets the alignment mask to be used for all zones requesting cache
461
 * Sets the alignment mask to be used for all zones requesting cache
452
 * alignment.  Should be called by MD boot code prior to starting VM/UMA.
462
 * alignment.  Should be called by MD boot code prior to starting VM/UMA.
453
 *
463
 *
Lines 545-550 void uma_zone_set_maxaction(uma_zone_t zone, uma_m Link Here
545
int uma_zone_get_cur(uma_zone_t zone);
555
int uma_zone_get_cur(uma_zone_t zone);
546
556
547
/*
557
/*
558
 * Obtains the approximate current size of items free in a zone
559
 *
560
 * Arguments:
561
 *	zone  The zone to obtain the current free size from
562
 *
563
 * Return:
564
 *	int  The approximate current size of items free in a zone
565
 */
566
size_t uma_zone_get_free_size(uma_zone_t zone);
567
568
569
/*
548
 * The following two routines (uma_zone_set_init/fini)
570
 * The following two routines (uma_zone_set_init/fini)
549
 * are used to set the backend init/fini pair which acts on an
571
 * are used to set the backend init/fini pair which acts on an
550
 * object as it becomes allocated and is placed in a slab within
572
 * object as it becomes allocated and is placed in a slab within
(-)vm/uma_core.c (+41 lines)
Lines 2987-2992 uma_zone_get_cur(uma_zone_t zone) Link Here
2987
}
2987
}
2988
2988
2989
/* See uma.h */
2989
/* See uma.h */
2990
size_t
2991
uma_zone_get_free_size(uma_zone_t zone)
2992
{
2993
	uma_klink_t kl;
2994
	uma_bucket_t bucket;
2995
	int64_t nitems;
2996
	u_int i;
2997
2998
	ZONE_LOCK(zone);
2999
	nitems = 0;
3000
	if(!(zone->uz_flags & UMA_ZONE_SECONDARY)) {
3001
		LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
3002
			nitems += kl->kl_keg->uk_free;
3003
		}
3004
	}
3005
	CPU_FOREACH(i) {
3006
		/*
3007
		 * See the comment in sysctl_vm_zone_stats() regarding the
3008
		 * safety of accessing the per-cpu caches. With the zone lock
3009
		 * held, it is safe, but can potentially result in stale data.
3010
		 */
3011
		bucket = zone->uz_cpu[i].uc_allocbucket;
3012
		if (bucket != NULL)
3013
			nitems += bucket->ub_cnt;
3014
		bucket = zone->uz_cpu[i].uc_freebucket;
3015
		if (bucket != NULL)
3016
			nitems += bucket->ub_cnt;
3017
	}
3018
	ZONE_UNLOCK(zone);
3019
	return (nitems * zone->uz_size);
3020
}
3021
3022
/* See uma.h */
2990
void
3023
void
2991
uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3024
uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2992
{
3025
{
Lines 3152-3157 uma_prealloc(uma_zone_t zone, int items) Link Here
3152
}
3185
}
3153
3186
3154
/* See uma.h */
3187
/* See uma.h */
3188
void
3189
uma_reclaim_zone_cache(uma_zone_t zone)
3190
{
3191
	bucket_enable();
3192
	cache_drain_safe(zone);
3193
}
3194
3195
/* See uma.h */
3155
static void
3196
static void
3156
uma_reclaim_locked(bool kmem_danger)
3197
uma_reclaim_locked(bool kmem_danger)
3157
{
3198
{

Return to bug 187594