| From d4899f4747fd03be748fd1a589b9db5786fa1375 Mon Sep 17 00:00:00 2001 |
| From: Brian Behlendorf <behlendorf1@llnl.gov> |
| Date: Fri, 11 Jan 2013 14:29:32 -0800 |
| Subject: [PATCH] kmem-cache: Fix slab ageing soft lockup |
| |
| Commit a10287e00d13c4c4dbbff14f42b00b03da363fcb slightly reworked |
| the slab ageing code such that it is no longer dependent on the |
| Linux delayed work queue interfaces. |
| |
| This was good for portability and performance, but it requires us |
| to use the on_each_cpu() function to execute the spl_magazine_age() |
| function. That means that the function is now executing in interrupt |
| context whereas before it was scheduled in normal process context. |
| And that means we need to be slightly more careful about the locking |
| in the interrupt handler. |
| |
| With the reworked code it's possible that we'll be holding the |
| skc->skc_lock and be interrupted to handle the spl_magazine_age() |
| IRQ. This will result in a deadlock and soft lockup errors unless |
| we're careful to detect the contention and avoid taking the lock in |
| the interupt handler. So that's what this patch does. |
| |
| Alternately, (and slightly more conventionally) we could have used |
| spin_lock_irqsave() to prevent this race entirely but I'd perfer to |
| avoid disabling interrupts as much as possible due to performance |
| concerns. There is absolutely no penalty for us not aging objects |
| out of the magazine due to contention. |
| |
| Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> |
| Signed-off-by: Prakash Surya <surya1@llnl.gov> |
| Closes zfsonlinux/zfs#1193 |
| --- |
| module/spl/spl-kmem.c | 94 +++++++++++++++++++++++++++---------------------- |
| 1 file changed, 51 insertions(+), 43 deletions(-) |
| |
| diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c |
| index bc08a55..cc5961e 100644 |
| --- a/module/spl/spl-kmem.c |
| +++ b/module/spl/spl-kmem.c |
| @@ -827,8 +827,7 @@ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) |
| struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ |
| taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */ |
| |
| -static int spl_cache_flush(spl_kmem_cache_t *skc, |
| - spl_kmem_magazine_t *skm, int flush); |
| +static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj); |
| |
| SPL_SHRINKER_CALLBACK_FWD_DECLARE(spl_kmem_cache_generic_shrinker); |
| SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker, |
| @@ -1244,6 +1243,38 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, |
| SRETURN(0); |
| } |
| |
| +/* |
| + * Release objects from the per-cpu magazine back to their slab. The flush |
| + * argument contains the max number of entries to remove from the magazine. |
| + */ |
| +static void |
| +__spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) |
| +{ |
| + int i, count = MIN(flush, skm->skm_avail); |
| + SENTRY; |
| + |
| + ASSERT(skc->skc_magic == SKC_MAGIC); |
| + ASSERT(skm->skm_magic == SKM_MAGIC); |
| + ASSERT(spin_is_locked(&skc->skc_lock)); |
| + |
| + for (i = 0; i < count; i++) |
| + spl_cache_shrink(skc, skm->skm_objs[i]); |
| + |
| + skm->skm_avail -= count; |
| + memmove(skm->skm_objs, &(skm->skm_objs[count]), |
| + sizeof(void *) * skm->skm_avail); |
| + |
| + SEXIT; |
| +} |
| + |
| +static void |
| +spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) |
| +{ |
| + spin_lock(&skc->skc_lock); |
| + __spl_cache_flush(skc, skm, flush); |
| + spin_unlock(&skc->skc_lock); |
| +} |
| + |
| static void |
| spl_magazine_age(void *data) |
| { |
| @@ -1252,10 +1283,23 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, |
| |
| ASSERT(skm->skm_magic == SKM_MAGIC); |
| ASSERT(skm->skm_cpu == smp_processor_id()); |
| + ASSERT(irqs_disabled()); |
| + |
| + /* There are no available objects or they are too young to age out */ |
| + if ((skm->skm_avail == 0) || |
| + time_before(jiffies, skm->skm_age + skc->skc_delay * HZ)) |
| + return; |
| |
| - if (skm->skm_avail > 0) |
| - if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) |
| - (void) spl_cache_flush(skc, skm, skm->skm_refill); |
| + /* |
| + * Because we're executing in interrupt context we may have |
| + * interrupted the holder of this lock. To avoid a potential |
| + * deadlock return if the lock is contended. |
| + */ |
| + if (!spin_trylock(&skc->skc_lock)) |
| + return; |
| + |
| + __spl_cache_flush(skc, skm, skm->skm_refill); |
| + spin_unlock(&skc->skc_lock); |
| } |
| |
| /* |
| @@ -1451,7 +1495,7 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, |
| |
| for_each_online_cpu(i) { |
| skm = skc->skc_mag[i]; |
| - (void)spl_cache_flush(skc, skm, skm->skm_avail); |
| + spl_cache_flush(skc, skm, skm->skm_avail); |
| spl_magazine_free(skm); |
| } |
| |
| @@ -1932,42 +1976,6 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, |
| } |
| |
| /* |
| - * Release a batch of objects from a per-cpu magazine back to their |
| - * respective slabs. This occurs when we exceed the magazine size, |
| - * are under memory pressure, when the cache is idle, or during |
| - * cache cleanup. The flush argument contains the number of entries |
| - * to remove from the magazine. |
| - */ |
| -static int |
| -spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) |
| -{ |
| - int i, count = MIN(flush, skm->skm_avail); |
| - SENTRY; |
| - |
| - ASSERT(skc->skc_magic == SKC_MAGIC); |
| - ASSERT(skm->skm_magic == SKM_MAGIC); |
| - |
| - /* |
| - * XXX: Currently we simply return objects from the magazine to |
| - * the slabs in fifo order. The ideal thing to do from a memory |
| - * fragmentation standpoint is to cheaply determine the set of |
| - * objects in the magazine which will result in the largest |
| - * number of free slabs if released from the magazine. |
| - */ |
| - spin_lock(&skc->skc_lock); |
| - for (i = 0; i < count; i++) |
| - spl_cache_shrink(skc, skm->skm_objs[i]); |
| - |
| - skm->skm_avail -= count; |
| - memmove(skm->skm_objs, &(skm->skm_objs[count]), |
| - sizeof(void *) * skm->skm_avail); |
| - |
| - spin_unlock(&skc->skc_lock); |
| - |
| - SRETURN(count); |
| -} |
| - |
| -/* |
| * Allocate an object from the per-cpu magazine, or if the magazine |
| * is empty directly allocate from a slab and repopulate the magazine. |
| */ |
| @@ -2053,7 +2061,7 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, |
| |
| /* Per-CPU cache full, flush it to make space */ |
| if (unlikely(skm->skm_avail >= skm->skm_size)) |
| - (void)spl_cache_flush(skc, skm, skm->skm_refill); |
| + spl_cache_flush(skc, skm, skm->skm_refill); |
| |
| /* Available space in cache, use it */ |
| skm->skm_objs[skm->skm_avail++] = obj; |
| -- |
| 1.7.10 |
| |