diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index b6af255376d..485340083c4 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -359,6 +359,7 @@ struct objcore {
 	VTAILQ_ENTRY(objcore)	lru_list;
 	VTAILQ_ENTRY(objcore)	ban_list;
 	VSTAILQ_ENTRY(objcore)	exp_list;
+	VTAILQ_HEAD(, req)	waitinglist;
 	struct ban		*ban;
 };
 
@@ -490,8 +491,8 @@ struct req {
 
 	struct objcore		*body_oc;
 
-	/* The busy objhead we sleep on */
-	struct objhead		*hash_objhead;
+	/* The busy objcore we sleep on */
+	struct objcore		*hash_oc;
 
 	/* Built Vary string == workspace reservation */
 	uint8_t			*vary_b;
diff --git a/bin/varnishd/cache/cache_ban_lurker.c b/bin/varnishd/cache/cache_ban_lurker.c
index 76ac772b9f7..311ec31d7fe 100644
--- a/bin/varnishd/cache/cache_ban_lurker.c
+++ b/bin/varnishd/cache/cache_ban_lurker.c
@@ -332,7 +332,7 @@ ban_lurker_test_ban(struct worker *wrk, struct vsl_log *vsl, struct ban *bt,
 			if (i)
 				ObjSendEvent(wrk, oc, OEV_BANCHG);
 		}
-		(void)HSH_DerefObjCore(wrk, &oc, 0);
+		(void)HSH_DerefObjCore(wrk, &oc);
 	}
 }
 
diff --git a/bin/varnishd/cache/cache_busyobj.c b/bin/varnishd/cache/cache_busyobj.c
index 9a5fe36d11a..17ef9b26738 100644
--- a/bin/varnishd/cache/cache_busyobj.c
+++ b/bin/varnishd/cache/cache_busyobj.c
@@ -175,10 +175,8 @@ VBO_ReleaseBusyObj(struct worker *wrk, struct busyobj **pbo)
 	if (WS_Overflowed(bo->ws))
 		wrk->stats->ws_backend_overflow++;
 
-	if (bo->fetch_objcore != NULL) {
-		(void)HSH_DerefObjCore(wrk, &bo->fetch_objcore,
-		    HSH_RUSH_POLICY);
-	}
+	if (bo->fetch_objcore != NULL)
+		(void)HSH_DerefObjCore(wrk, &bo->fetch_objcore);
 
 	VRT_Assign_Backend(&bo->director_req, NULL);
 	VRT_Assign_Backend(&bo->director_resp, NULL);
diff --git a/bin/varnishd/cache/cache_expire.c b/bin/varnishd/cache/cache_expire.c
index 0392d191d09..f1122ec34df 100644
--- a/bin/varnishd/cache/cache_expire.c
+++ b/bin/varnishd/cache/cache_expire.c
@@ -209,7 +209,7 @@ EXP_Insert(struct worker *wrk, struct objcore *oc)
 		ObjSendEvent(wrk, oc, OEV_EXPIRE);
 		tmpoc = oc;
 		assert(oc->refcnt >= 2); /* Silence coverity */
-		(void)HSH_DerefObjCore(wrk, &oc, 0);
+		(void)HSH_DerefObjCore(wrk, &oc);
 		AZ(oc);
 		assert(tmpoc->refcnt >= 1); /* Silence coverity */
 	}
@@ -280,7 +280,7 @@ exp_inbox(struct exp_priv *ep, struct objcore *oc, unsigned flags)
 		assert(oc->refcnt > 0);
 		AZ(oc->exp_flags);
 		ObjSendEvent(ep->wrk, oc, OEV_EXPIRE);
-		(void)HSH_DerefObjCore(ep->wrk, &oc, 0);
+		(void)HSH_DerefObjCore(ep->wrk, &oc);
 		return;
 	}
 
@@ -357,7 +357,7 @@ exp_expire(struct exp_priv *ep, vtim_real now)
 		VSLb(&ep->vsl, SLT_ExpKill, "EXP_Expired xid=%ju t=%.0f",
 		    VXID(ObjGetXID(ep->wrk, oc)), EXP_Ttl(NULL, oc) - now);
 		ObjSendEvent(ep->wrk, oc, OEV_EXPIRE);
-		(void)HSH_DerefObjCore(ep->wrk, &oc, 0);
+		(void)HSH_DerefObjCore(ep->wrk, &oc);
 	}
 	return (0);
 }
diff --git a/bin/varnishd/cache/cache_fetch.c b/bin/varnishd/cache/cache_fetch.c
index 027b617f571..ebe5b0b73c7 100644
--- a/bin/varnishd/cache/cache_fetch.c
+++ b/bin/varnishd/cache/cache_fetch.c
@@ -34,7 +34,6 @@
 #include "cache_varnishd.h"
 #include "cache_filter.h"
 #include "cache_objhead.h"
-#include "hash/hash_slinger.h"
 #include "storage/storage.h"
 #include "vcl.h"
 #include "vtim.h"
@@ -923,7 +922,7 @@ vbf_stp_error(struct worker *wrk, struct busyobj *bo)
 
 	stale = bo->stale_oc;
 	oc->t_origin = now;
-	if (!VTAILQ_EMPTY(&oc->objhead->waitinglist)) {
+	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
 		/*
 		 * If there is a waitinglist, it means that there is no
 		 * grace-able object, so cache the error return for a
@@ -1059,7 +1058,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 		CHECK_OBJ_NOTNULL(bo->stale_oc, OBJCORE_MAGIC);
 		/* We don't want the oc/stevedore ops in fetching thread */
 		if (!ObjCheckFlag(wrk, bo->stale_oc, OF_IMSCAND))
-			(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 	}
 #endif
 
@@ -1086,7 +1085,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 	http_Teardown(bo->beresp);
 	// can not make assumptions about the number of references here #3434
 	if (bo->bereq_body != NULL)
-		(void) HSH_DerefObjCore(bo->wrk, &bo->bereq_body, 0);
+		(void) HSH_DerefObjCore(bo->wrk, &bo->bereq_body);
 
 	if (oc->boc->state == BOS_FINISHED) {
 		AZ(oc->flags & OC_F_FAILED);
@@ -1096,7 +1095,7 @@ vbf_fetch_thread(struct worker *wrk, void *priv)
 	// AZ(oc->boc);	// XXX
 
 	if (bo->stale_oc != NULL)
-		(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+		(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 
 	wrk->vsl = NULL;
 	HSH_DerefBoc(wrk, oc);
@@ -1185,7 +1184,7 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc,
 		wrk->stats->bgfetch_no_thread++;
 		(void)vbf_stp_fail(req->wrk, bo);
 		if (bo->stale_oc != NULL)
-			(void)HSH_DerefObjCore(wrk, &bo->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &bo->stale_oc);
 		HSH_DerefBoc(wrk, oc);
 		SES_Rel(bo->sp);
 		THR_SetBusyobj(NULL);
@@ -1210,5 +1209,5 @@ VBF_Fetch(struct worker *wrk, struct req *req, struct objcore *oc,
 	assert(oc->boc == boc);
 	HSH_DerefBoc(wrk, oc);
 	if (mode == VBF_BACKGROUND)
-		(void)HSH_DerefObjCore(wrk, &oc, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &oc);
 }
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index 4b379988828..0d729288bfe 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -75,12 +75,8 @@ struct rush {
 static const struct hash_slinger *hash;
 static struct objhead *private_oh;
 
-static void hsh_rush1(const struct worker *, struct objhead *,
-    struct rush *, int);
-static void hsh_rush2(struct worker *, struct rush *);
 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
-static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
-    int);
+static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh);
 
 /*---------------------------------------------------------------------*/
 
@@ -102,7 +98,6 @@ hsh_newobjhead(void)
 	XXXAN(oh);
 	oh->refcnt = 1;
 	VTAILQ_INIT(&oh->objcs);
-	VTAILQ_INIT(&oh->waitinglist);
 	Lck_New(&oh->mtx, lck_objhdr);
 	return (oh);
 }
@@ -182,7 +177,6 @@ HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
 
 	AZ(oh->refcnt);
 	assert(VTAILQ_EMPTY(&oh->objcs));
-	assert(VTAILQ_EMPTY(&oh->waitinglist));
 	Lck_Delete(&oh->mtx);
 	wrk->stats->n_objecthead--;
 	FREE_OBJ(oh);
@@ -287,17 +281,16 @@ HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
     struct ban *ban)
 {
 	struct objhead *oh;
-	struct rush rush;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
 	AN(digest);
 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	AN(ban);
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
 	AN(oc->flags & OC_F_BUSY);
 	AZ(oc->flags & OC_F_PRIVATE);
 	assert(oc->refcnt == 1);
-	INIT_OBJ(&rush, RUSH_MAGIC);
 
 	hsh_prealloc(wrk);
 
@@ -323,10 +316,7 @@ HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oh->waitinglist))
-		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
 	Lck_Unlock(&oh->mtx);
-	hsh_rush2(wrk, &rush);
 
 	EXP_Insert(wrk, oc);
 }
@@ -356,68 +346,179 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
 }
 
 /*---------------------------------------------------------------------
+ * Pick the req's we are going to rush from the waiting list
  */
 
-enum lookup_e
-HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
+static void
+hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
+{
+	int max, i;
+	unsigned xid = 0;
+	struct req *req;
+
+	AZ(oc->flags & OC_F_BUSY);
+	max = oc->flags != 0 ? cache_param->rush_exponent : INT_MAX;
+	assert(max > 0);
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
+	VTAILQ_INIT(&r->reqs);
+	Lck_AssertHeld(&oc->objhead->mtx);
+	for (i = 0; i < max; i++) {
+		req = VTAILQ_FIRST(&oc->waitinglist);
+		if (req == NULL)
+			break;
+
+		if (DO_DEBUG(DBG_WAITINGLIST)) {
+			xid = VXID(req->vsl->wid);
+			VSLb(wrk->vsl, SLT_Debug,
+			    "waiting list rush for req %u", xid);
+		}
+
+		assert(oc->refcnt > 0);
+		oc->refcnt++;
+		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+		wrk->stats->busy_wakeup++;
+		AZ(req->wrk);
+		VTAILQ_REMOVE(&oc->waitinglist, req, w_list);
+		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
+		req->waitinglist = 0;
+	}
+}
+
+/*---------------------------------------------------------------------
+ * Rush req's that came from waiting list.
+ */
+
+static void
+hsh_rush2(struct worker *wrk, struct rush *r)
+{
+	struct req *req;
+
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
+
+	while (!VTAILQ_EMPTY(&r->reqs)) {
+		req = VTAILQ_FIRST(&r->reqs);
+		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+		VTAILQ_REMOVE(&r->reqs, req, w_list);
+		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
+		if (req->transport->reembark != NULL) {
+			// For ESI includes
+			req->transport->reembark(wrk, req);
+		} else {
+			/*
+			 * We ignore the queue limits which apply to new
+			 * requests because if we fail to reschedule there
+			 * may be vmod_privs to cleanup and we need a proper
+			 * workerthread for that.
+			 */
+			AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
+		}
+	}
+}
+
+/*---------------------------------------------------------------------
+ */
+
+static unsigned
+hsh_rush_match(struct req *req)
+{
+	struct objcore *oc;
+	const uint8_t *vary;
+
+	oc = req->hash_oc;
+	CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC);
+
+	if (oc == NULL)
+		return (0);
+
+	AZ(oc->flags & OC_F_BUSY);
+	if (oc->flags != 0)
+		return (0);
+
+	/* The objcore was serviceable, all the requests on the waiting list
+	 * were rushed at once.
+	 */
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
+
+	if (req->hash_ignore_vary)
+		return (1);
+	if (!ObjHasAttr(req->wrk, oc, OA_VARY))
+		return (1);
+
+	vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
+	AN(vary);
+	return (VRY_Match(req, vary));
+}
+
+static void
+hsh_rush_move(struct objcore *new_oc, struct objcore *old_oc)
+{
+	struct req *req;
+
+	CHECK_OBJ_NOTNULL(new_oc, OBJCORE_MAGIC);
+	CHECK_OBJ_ORNULL(old_oc, OBJCORE_MAGIC);
+
+	if (old_oc == NULL || VTAILQ_EMPTY(&old_oc->waitinglist))
+		return;
+
+	assert(old_oc->objhead == new_oc->objhead);
+	assert(old_oc->refcnt > 0);
+	assert(new_oc->refcnt > 0);
+	Lck_AssertHeld(&old_oc->objhead->mtx);
+
+	/* NB: req holds a weak reference of its hash_oc, so no reference
+	 * counting is needed when moving to the new_oc. An actual old_oc
+	 * reference should be held by either the fetch task rushing its
+	 * waiting list at unbusy time, or a rushed request exponentially
+	 * rushing other requests from the waiting list.
+	 */
+	VTAILQ_FOREACH(req, &old_oc->waitinglist, w_list) {
+		CHECK_OBJ(req, REQ_MAGIC);
+		assert(req->hash_oc == old_oc);
+		req->hash_oc = new_oc;
+	}
+
+	/* NB: The double concatenation of lists allows requests that were
+	 * waiting for the old_oc show up first in the waiting list of the
+	 * new_oc.
+	 */
+	VTAILQ_CONCAT(&old_oc->waitinglist, &new_oc->waitinglist, w_list);
+	VTAILQ_CONCAT(&new_oc->waitinglist, &old_oc->waitinglist, w_list);
+}
+
+/*---------------------------------------------------------------------
+ */
+
+static enum lookup_e
+hsh_objhead_lookup(struct objhead *oh, struct req *req, struct objcore **ocp,
+    struct objcore **bocp)
 {
 	struct worker *wrk;
-	struct objhead *oh;
 	struct objcore *oc;
 	struct objcore *exp_oc;
+	struct objcore *busy_oc;
 	const struct vcf_return *vr;
 	vtim_real exp_t_origin;
-	int busy_found;
 	const uint8_t *vary;
-	intmax_t boc_progress;
-	unsigned xid = 0;
-	float dttl = 0.0;
 
+	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+	Lck_AssertHeld(&oh->mtx);
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	wrk = req->wrk;
+	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	AN(ocp);
 	*ocp = NULL;
 	AN(bocp);
 	*bocp = NULL;
 
-	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	wrk = req->wrk;
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
-	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
-	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
-	AN(hash);
-
-	hsh_prealloc(wrk);
-	if (DO_DEBUG(DBG_HASHEDGE))
-		hsh_testmagic(req->digest);
-
-	if (req->hash_objhead != NULL) {
-		/*
-		 * This req came off the waiting list, and brings an
-		 * oh refcnt with it.
-		 */
-		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
-		oh = req->hash_objhead;
-		Lck_Lock(&oh->mtx);
-		req->hash_objhead = NULL;
-	} else {
-		AN(wrk->wpriv->nobjhead);
-		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
-	}
-
-	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
-	Lck_AssertHeld(&oh->mtx);
-
-	if (req->hash_always_miss) {
-		/* XXX: should we do predictive Vary in this case ? */
-		/* Insert new objcore in objecthead and release mutex */
-		*bocp = hsh_insert_busyobj(wrk, oh);
-		/* NB: no deref of objhead, new object inherits reference */
-		Lck_Unlock(&oh->mtx);
+	if (req->hash_always_miss)
 		return (HSH_MISS);
-	}
 
 	assert(oh->refcnt > 0);
-	busy_found = 0;
+	busy_oc = NULL;
 	exp_oc = NULL;
 	exp_t_origin = 0.0;
 	VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
@@ -434,17 +535,19 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 
 		CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
 		if (oc->flags & OC_F_BUSY) {
+			if (busy_oc != NULL)
+				continue;
 			if (req->hash_ignore_busy)
 				continue;
 
-			if (oc->boc && oc->boc->vary != NULL &&
+			if (oc->boc != NULL && oc->boc->vary != NULL &&
 			    !req->hash_ignore_vary &&
 			    !VRY_Match(req, oc->boc->vary)) {
 				wrk->strangelove++;
 				continue;
 			}
 
-			busy_found = 1;
+			busy_oc = oc;
 			continue;
 		}
 
@@ -499,30 +602,15 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		(void)req->vcf->func(req, &oc, &exp_oc, 1);
 
 	if (oc != NULL && oc->flags & OC_F_HFP) {
-		xid = VXID(ObjGetXID(wrk, oc));
-		dttl = EXP_Dttl(req, oc);
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
-		wrk->stats->cache_hitpass++;
-		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
+		*ocp = oc;
 		return (HSH_HITPASS);
 	}
 
 	if (oc != NULL) {
 		*ocp = oc;
 		oc->refcnt++;
-		if (oc->flags & OC_F_HFM) {
-			xid = VXID(ObjGetXID(wrk, oc));
-			dttl = EXP_Dttl(req, oc);
-			*bocp = hsh_insert_busyobj(wrk, oh);
-			Lck_Unlock(&oh->mtx);
-			wrk->stats->cache_hitmiss++;
-			VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
+		if (oc->flags & OC_F_HFM)
 			return (HSH_HITMISS);
-		}
-		oc->hits++;
-		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
-		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
-		Req_LogHit(wrk, req, oc, boc_progress);
 		return (HSH_HIT);
 	}
 
@@ -531,137 +619,225 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 		 * expired HFM ("grace/keep HFM")
 		 *
 		 * XXX should HFM objects actually have grace/keep ?
-		 * XXX also:  why isn't *ocp = exp_oc ?
 		 */
-		xid = VXID(ObjGetXID(wrk, exp_oc));
-		dttl = EXP_Dttl(req, exp_oc);
-		*bocp = hsh_insert_busyobj(wrk, oh);
-		Lck_Unlock(&oh->mtx);
-		wrk->stats->cache_hitmiss++;
-		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
-		return (HSH_HITMISS);
+		*ocp = exp_oc;
+		return (HSH_HITMISS_EXP);
 	}
 
-	if (exp_oc != NULL && exp_oc->boc != NULL)
-		boc_progress = exp_oc->boc->fetched_so_far;
-	else
-		boc_progress = -1;
-
-	if (!busy_found) {
-		*bocp = hsh_insert_busyobj(wrk, oh);
-
-		if (exp_oc != NULL) {
-			exp_oc->refcnt++;
-			*ocp = exp_oc;
-			if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
-				exp_oc->hits++;
-				Lck_Unlock(&oh->mtx);
-				Req_LogHit(wrk, req, exp_oc, boc_progress);
-				return (HSH_GRACE);
-			}
-		}
-		Lck_Unlock(&oh->mtx);
-		return (HSH_MISS);
+	if (busy_oc == NULL) {
+		if (exp_oc == NULL)
+			return (HSH_MISS);
+		*ocp = exp_oc;
+		exp_oc->refcnt++;
+		if (EXP_Ttl_grace(req, exp_oc) >= req->t_req)
+			return (HSH_GRACE);
+		return (HSH_MISS_EXP);
 	}
 
-	AN(busy_found);
+	CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+	*bocp = busy_oc;
+
 	if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
 		/* we do not wait on the busy object if in grace */
-		exp_oc->refcnt++;
 		*ocp = exp_oc;
-		exp_oc->hits++;
-		AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
-		Req_LogHit(wrk, req, exp_oc, boc_progress);
+		exp_oc->refcnt++;
 		return (HSH_GRACE);
 	}
 
-	/* There are one or more busy objects, wait for them */
-	VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
-
-	AZ(req->hash_ignore_busy);
-
-	/*
-	 * The objhead reference transfers to the sess, we get it
-	 * back when the sess comes off the waiting list and
-	 * calls us again
-	 */
-	req->hash_objhead = oh;
-	req->wrk = NULL;
-	req->waitinglist = 1;
-
-	if (DO_DEBUG(DBG_WAITINGLIST))
-		VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
-
-	Lck_Unlock(&oh->mtx);
-
-	wrk->stats->busy_sleep++;
 	return (HSH_BUSY);
 }
 
 /*---------------------------------------------------------------------
- * Pick the req's we are going to rush from the waiting list
  */
 
-static void
-hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
+enum lookup_e
+HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 {
-	int i;
-	struct req *req;
+	enum lookup_e lr;
+	struct worker *wrk;
+	struct objhead *oh;
+	struct objcore *oc;
+	struct objcore *busy_oc;
+	struct objcore *rush_oc;
+	struct rush rush;
+	intmax_t boc_progress;
+	unsigned xid = 0;
+	float dttl = 0.0;
 
-	if (max == 0)
-		return;
-	if (max == HSH_RUSH_POLICY)
-		max = cache_param->rush_exponent;
-	assert(max > 0);
+	AN(ocp);
+	*ocp = NULL;
+	AN(bocp);
+	*bocp = NULL;
 
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	wrk = req->wrk;
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
-	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
-	VTAILQ_INIT(&r->reqs);
-	Lck_AssertHeld(&oh->mtx);
-	for (i = 0; i < max; i++) {
-		req = VTAILQ_FIRST(&oh->waitinglist);
-		if (req == NULL)
-			break;
-		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-		wrk->stats->busy_wakeup++;
-		AZ(req->wrk);
-		VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
-		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
-		req->waitinglist = 0;
+	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
+	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
+	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
+	AN(hash);
+
+	hsh_prealloc(wrk);
+	if (DO_DEBUG(DBG_HASHEDGE))
+		hsh_testmagic(req->digest);
+
+	if (hsh_rush_match(req)) {
+		TAKE_OBJ_NOTNULL(oc, &req->hash_oc, OBJCORE_MAGIC);
+		*ocp = oc;
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		Req_LogHit(wrk, req, oc, boc_progress);
+		/* NB: since this hit comes from the waiting list instead of
+		 * a regular lookup, grace is not considered. The object is
+		 * fresh in the context of the waiting list, even expired: it
+		 * was successfully just [re]validated by a fetch task.
+		 */
+		return (HSH_HIT);
 	}
-}
 
-/*---------------------------------------------------------------------
- * Rush req's that came from waiting list.
- */
+	if (req->hash_oc != NULL) {
+		/*
+		 * This req came off the waiting list, and brings an
+		 * incompatible hash_oc refcnt with it.
+		 */
+		TAKE_OBJ_NOTNULL(rush_oc, &req->hash_oc, OBJCORE_MAGIC);
+		oh = rush_oc->objhead;
+		Lck_Lock(&oh->mtx);
+	} else {
+		AN(wrk->wpriv->nobjhead);
+		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
+		rush_oc = NULL;
+	}
 
-static void
-hsh_rush2(struct worker *wrk, struct rush *r)
-{
-	struct req *req;
+	lr = hsh_objhead_lookup(oh, req, &oc, &busy_oc);
 
-	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
-	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
+	INIT_OBJ(&rush, RUSH_MAGIC);
 
-	while (!VTAILQ_EMPTY(&r->reqs)) {
-		req = VTAILQ_FIRST(&r->reqs);
-		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-		VTAILQ_REMOVE(&r->reqs, req, w_list);
-		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
-		if (req->transport->reembark != NULL) {
-			// For ESI includes
-			req->transport->reembark(wrk, req);
-		} else {
-			/*
-			 * We ignore the queue limits which apply to new
-			 * requests because if we fail to reschedule there
-			 * may be vmod_privs to cleanup and we need a proper
-			 * workerthread for that.
+	if (rush_oc != NULL && !VTAILQ_EMPTY(&rush_oc->waitinglist)) {
+		switch (lr) {
+		case HSH_HIT:
+		case HSH_GRACE:
+			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+			AZ(oc->flags);
+			hsh_rush_move(oc, rush_oc);
+			hsh_rush1(wrk, oc, &rush);
+			assert(VTAILQ_EMPTY(&oc->waitinglist));
+			break;
+		case HSH_BUSY:
+			CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+			hsh_rush_move(busy_oc, rush_oc);
+			break;
+		default:
+			/* The remaining stragglers will be passed on to the
+			 * next busy object or woken up as per rush_exponent
+			 * when the rush_oc reference is dropped.
 			 */
-			AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
+			break;
+		}
+	}
+
+	switch (lr) {
+	case HSH_MISS:
+	case HSH_MISS_EXP:
+		if (lr == HSH_MISS)
+			AZ(oc);
+		else
+			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		*bocp = hsh_insert_busyobj(wrk, oh);
+		hsh_rush_move(*bocp, rush_oc);
+		Lck_Unlock(&oh->mtx);
+		break;
+	case HSH_HITMISS:
+	case HSH_HITMISS_EXP:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		xid = VXID(ObjGetXID(wrk, oc));
+		dttl = EXP_Dttl(req, oc);
+		*bocp = hsh_insert_busyobj(wrk, oh);
+		hsh_rush_move(*bocp, rush_oc);
+		Lck_Unlock(&oh->mtx);
+		wrk->stats->cache_hitmiss++;
+		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
+		if (lr == HSH_HITMISS_EXP)
+			oc = NULL;
+		break;
+	case HSH_HITPASS:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		xid = VXID(ObjGetXID(wrk, oc));
+		dttl = EXP_Dttl(req, oc);
+		AN(hsh_deref_objhead_unlock(wrk, &oh));
+		wrk->stats->cache_hitpass++;
+		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
+		oc = NULL;
+		break;
+	case HSH_HIT:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		AZ(busy_oc);
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		AN(hsh_deref_objhead_unlock(wrk, &oh));
+		Req_LogHit(wrk, req, oc, boc_progress);
+		break;
+	case HSH_GRACE:
+		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+		CHECK_OBJ_ORNULL(busy_oc, OBJCORE_MAGIC);
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		if (busy_oc == NULL) {
+			*bocp = hsh_insert_busyobj(wrk, oh);
+			Lck_Unlock(&oh->mtx);
+		} else {
+			/* we do not wait on the busy object if in grace */
+			AN(hsh_deref_objhead_unlock(wrk, &oh));
+			busy_oc = NULL;
 		}
+		Req_LogHit(wrk, req, oc, boc_progress);
+		break;
+	case HSH_BUSY:
+		AZ(oc);
+		CHECK_OBJ_NOTNULL(busy_oc, OBJCORE_MAGIC);
+		AZ(req->hash_ignore_busy);
+
+		if (rush_oc != NULL)
+			VTAILQ_INSERT_HEAD(&busy_oc->waitinglist, req, w_list);
+		else
+			VTAILQ_INSERT_TAIL(&busy_oc->waitinglist, req, w_list);
+
+		/*
+		 * The objcore reference transfers to the req, we get it
+		 * back when the req comes off the waiting list and calls
+		 * us again. The reference counter increase is deferred to
+		 * the rush hour.
+		 */
+		assert(busy_oc->refcnt > 0);
+		AZ(req->hash_oc);
+		req->hash_oc = busy_oc;
+		req->wrk = NULL;
+		req->waitinglist = 1;
+
+		if (DO_DEBUG(DBG_WAITINGLIST))
+			VSLb(req->vsl, SLT_Debug,
+			    "on waiting list <%p>", busy_oc);
+
+		Lck_Unlock(&oh->mtx);
+
+		wrk->stats->busy_sleep++;
+		break;
+	default:
+		WRONG("Invalid lookup result");
+	}
+
+	if (oc != NULL)
+		*ocp = oc;
+
+	if (rush_oc != NULL) {
+		hsh_rush2(wrk, &rush);
+		(void)HSH_DerefObjCore(wrk, &rush_oc);
 	}
+
+	return (lr);
 }
 
 /*---------------------------------------------------------------------
@@ -741,7 +917,7 @@ HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
 		for (i = 0; i < j; i++) {
 			CHECK_OBJ_NOTNULL(ocp[i], OBJCORE_MAGIC);
 			EXP_Rearm(ocp[i], ttl_now, ttl, grace, keep);
-			(void)HSH_DerefObjCore(wrk, &ocp[i], 0);
+			(void)HSH_DerefObjCore(wrk, &ocp[i]);
 			AZ(ocp[i]);
 			total++;
 		}
@@ -887,10 +1063,7 @@ HSH_Unbusy(struct worker *wrk, struct objcore *oc)
 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
 	oc->flags &= ~OC_F_BUSY;
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
-	}
+	hsh_rush1(wrk, oc, &rush);
 	Lck_Unlock(&oh->mtx);
 	EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
 			      * called */
@@ -1021,7 +1194,7 @@ HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
  */
 
 int
-HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
+HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp)
 {
 	struct objcore *oc;
 	struct objhead *oh;
@@ -1041,9 +1214,9 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 	r = --oc->refcnt;
 	if (!r)
 		VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, rushmax);
+	if (!VTAILQ_EMPTY(&oc->waitinglist)) {
+		assert(oc->refcnt > 0);
+		hsh_rush1(wrk, oc, &rush);
 	}
 	Lck_Unlock(&oh->mtx);
 	hsh_rush2(wrk, &rush);
@@ -1066,11 +1239,9 @@ HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
 }
 
 static int
-hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
+hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh)
 {
 	struct objhead *oh;
-	struct rush rush;
-	int r;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
@@ -1078,26 +1249,14 @@ hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
 	Lck_AssertHeld(&oh->mtx);
 
 	if (oh == private_oh) {
-		assert(VTAILQ_EMPTY(&oh->waitinglist));
 		assert(oh->refcnt > 1);
 		oh->refcnt--;
 		Lck_Unlock(&oh->mtx);
 		return (1);
 	}
 
-	INIT_OBJ(&rush, RUSH_MAGIC);
-	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
-		assert(oh->refcnt > 1);
-		hsh_rush1(wrk, oh, &rush, max);
-	}
-
-	if (oh->refcnt == 1)
-		assert(VTAILQ_EMPTY(&oh->waitinglist));
-
 	assert(oh->refcnt > 0);
-	r = hash->deref(wrk, oh); /* Unlocks oh->mtx */
-	hsh_rush2(wrk, &rush);
-	return (r);
+	return (hash->deref(wrk, oh)); /* Unlocks oh->mtx */
 }
 
 static int
@@ -1109,7 +1268,7 @@ hsh_deref_objhead(struct worker *wrk, struct objhead **poh)
 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
 
 	Lck_Lock(&oh->mtx);
-	return (hsh_deref_objhead_unlock(wrk, &oh, 0));
+	return (hsh_deref_objhead_unlock(wrk, &oh));
 }
 
 void
diff --git a/bin/varnishd/cache/cache_obj.c b/bin/varnishd/cache/cache_obj.c
index d92fc1f0d37..43d854904da 100644
--- a/bin/varnishd/cache/cache_obj.c
+++ b/bin/varnishd/cache/cache_obj.c
@@ -142,6 +142,7 @@ ObjNew(const struct worker *wrk)
 	wrk->stats->n_objectcore++;
 	oc->last_lru = NAN;
 	oc->flags = OC_F_BUSY;
+	VTAILQ_INIT(&oc->waitinglist);
 
 	oc->boc = obj_newboc();
 
@@ -160,6 +161,7 @@ ObjDestroy(const struct worker *wrk, struct objcore **p)
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	TAKE_OBJ_NOTNULL(oc, p, OBJCORE_MAGIC);
+	assert(VTAILQ_EMPTY(&oc->waitinglist));
 	if (oc->boc != NULL)
 		obj_deleteboc(&oc->boc);
 	FREE_OBJ(oc);
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index bc1782379e3..50d3823ad63 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -40,7 +40,6 @@ struct objhead {
 	struct lock		mtx;
 	VTAILQ_HEAD(,objcore)	objcs;
 	uint8_t			digest[DIGEST_LEN];
-	VTAILQ_HEAD(, req)	waitinglist;
 
 	/*----------------------------------------------------
 	 * The fields below are for the sole private use of
@@ -56,6 +55,17 @@ struct objhead {
 #define hoh_head _u.n.u_n_hoh_head
 };
 
+enum lookup_e {
+	HSH_MISS,
+	HSH_MISS_EXP,
+	HSH_HITMISS,
+	HSH_HITMISS_EXP,
+	HSH_HITPASS,
+	HSH_HIT,
+	HSH_GRACE,
+	HSH_BUSY,
+};
+
 void HSH_Fail(struct objcore *);
 void HSH_Kill(struct objcore *);
 void HSH_Insert(struct worker *, const void *hash, struct objcore *,
@@ -66,8 +76,7 @@ struct boc *HSH_RefBoc(const struct objcore *);
 void HSH_DerefBoc(struct worker *wrk, struct objcore *);
 void HSH_DeleteObjHead(const struct worker *, struct objhead *);
 
-int HSH_DerefObjCore(struct worker *, struct objcore **, int rushmax);
-#define HSH_RUSH_POLICY -1
+int HSH_DerefObjCore(struct worker *, struct objcore **);
 
 enum lookup_e HSH_Lookup(struct req *, struct objcore **, struct objcore **);
 void HSH_Ref(struct objcore *o);
diff --git a/bin/varnishd/cache/cache_req_body.c b/bin/varnishd/cache/cache_req_body.c
index bca8717656f..2767feda927 100644
--- a/bin/varnishd/cache/cache_req_body.c
+++ b/bin/varnishd/cache/cache_req_body.c
@@ -41,7 +41,6 @@
 
 #include "vtim.h"
 #include "storage/storage.h"
-#include "hash/hash_slinger.h"
 
 /*----------------------------------------------------------------------
  * Pull the req.body in via/into a objcore
@@ -82,7 +81,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	if (STV_NewObject(req->wrk, req->body_oc, stv, hint) == 0) {
 		req->req_body_status = BS_ERROR;
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		(void)VFP_Error(vfc, "Object allocation failed:"
 		    " Ran out of space in %s", stv->vclname);
 		return (-1);
@@ -96,7 +95,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	if (VFP_Open(ctx, vfc) < 0) {
 		req->req_body_status = BS_ERROR;
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		return (-1);
 	}
 
@@ -142,7 +141,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 	VSLb_ts_req(req, "ReqBody", VTIM_real());
 	if (func != NULL) {
 		HSH_DerefBoc(req->wrk, req->body_oc);
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		if (vfps != VFP_END) {
 			req->req_body_status = BS_ERROR;
 			if (r == 0)
@@ -156,7 +155,7 @@ vrb_pull(struct req *req, ssize_t maxsize, objiterate_f *func, void *priv)
 
 	if (vfps != VFP_END) {
 		req->req_body_status = BS_ERROR;
-		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc, 0));
+		AZ(HSH_DerefObjCore(req->wrk, &req->body_oc));
 		return (-1);
 	}
 
@@ -277,7 +276,7 @@ VRB_Free(struct req *req)
 	if (req->body_oc == NULL)
 		return;
 
-	r = HSH_DerefObjCore(req->wrk, &req->body_oc, 0);
+	r = HSH_DerefObjCore(req->wrk, &req->body_oc);
 
 	// each busyobj may have gained a reference
 	assert (r >= 0);
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index 789cea5408a..80b933792dc 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -46,7 +46,6 @@
 #include "cache_transport.h"
 #include "vcc_interface.h"
 
-#include "hash/hash_slinger.h"
 #include "http1/cache_http1.h"
 #include "storage/storage.h"
 #include "vcl.h"
@@ -220,7 +219,7 @@ cnt_deliver(struct worker *wrk, struct req *req)
 	ObjTouch(req->wrk, req->objcore, req->t_prev);
 
 	if (Resp_Setup_Deliver(req)) {
-		(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		req->err_code = 500;
 		req->req_step = R_STP_SYNTH;
 		return (REQ_FSM_MORE);
@@ -240,7 +239,7 @@ cnt_deliver(struct worker *wrk, struct req *req)
 
 	if (wrk->vpi->handling != VCL_RET_DELIVER) {
 		HSH_Cancel(wrk, req->objcore, NULL);
-		(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		http_Teardown(req->resp);
 
 		switch (wrk->vpi->handling) {
@@ -401,7 +400,7 @@ cnt_synth(struct worker *wrk, struct req *req)
 		VSLb(req->vsl, SLT_Error, "Could not get storage");
 		req->doclose = SC_OVERLOAD;
 		VSLb_ts_req(req, "Resp", W_TIM_real(wrk));
-		(void)HSH_DerefObjCore(wrk, &req->objcore, 1);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		http_Teardown(req->resp);
 		return (REQ_FSM_DONE);
 	}
@@ -500,7 +499,7 @@ cnt_transmit(struct worker *wrk, struct req *req)
 	if (boc != NULL)
 		HSH_DerefBoc(wrk, req->objcore);
 
-	(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+	(void)HSH_DerefObjCore(wrk, &req->objcore);
 	http_Teardown(req->resp);
 
 	req->filter_list = NULL;
@@ -527,7 +526,7 @@ cnt_fetch(struct worker *wrk, struct req *req)
 	if (req->objcore->flags & OC_F_FAILED) {
 		req->err_code = 503;
 		req->req_step = R_STP_SYNTH;
-		(void)HSH_DerefObjCore(wrk, &req->objcore, 1);
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		AZ(req->objcore);
 		return (REQ_FSM_MORE);
 	}
@@ -546,7 +545,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
 {
 	struct objcore *oc, *busy;
 	enum lookup_e lr;
-	int had_objhead = 0;
+	int had_objcore = 0;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
@@ -558,8 +557,8 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	VRY_Prep(req);
 
 	AZ(req->objcore);
-	if (req->hash_objhead)
-		had_objhead = 1;
+	if (req->hash_oc)
+		had_objcore = 1;
 	wrk->strangelove = 0;
 	lr = HSH_Lookup(req, &oc, &busy);
 	if (lr == HSH_BUSY) {
@@ -575,7 +574,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	if ((unsigned)wrk->strangelove >= cache_param->vary_notice)
 		VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)",
 		    wrk->strangelove);
-	if (had_objhead)
+	if (had_objcore)
 		VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk));
 
 	if (req->vcf != NULL) {
@@ -591,26 +590,33 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	}
 
 	AZ(req->objcore);
-	if (lr == HSH_MISS || lr == HSH_HITMISS) {
+	switch (lr) {
+	case HSH_HITMISS:
+	case HSH_HITMISS_EXP:
+		req->is_hitmiss = 1;
+		/* FALL_THROUGH */
+	case HSH_MISS:
+	case HSH_MISS_EXP:
 		AN(busy);
 		AN(busy->flags & OC_F_BUSY);
 		req->objcore = busy;
 		req->stale_oc = oc;
 		req->req_step = R_STP_MISS;
-		if (lr == HSH_HITMISS)
-			req->is_hitmiss = 1;
 		return (REQ_FSM_MORE);
-	}
-	if (lr == HSH_HITPASS) {
+	case HSH_HITPASS:
 		AZ(busy);
 		AZ(oc);
 		req->req_step = R_STP_PASS;
 		req->is_hitpass = 1;
 		return (REQ_FSM_MORE);
+	case HSH_HIT:
+	case HSH_GRACE:
+		break;
+	case HSH_BUSY:
+	default:
+		WRONG("Invalid lookup result");
 	}
 
-	assert(lr == HSH_HIT || lr == HSH_GRACE);
-
 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
 	AZ(oc->flags & OC_F_BUSY);
 	req->objcore = oc;
@@ -655,10 +661,10 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	}
 
 	/* Drop our object, we won't need it */
-	(void)HSH_DerefObjCore(wrk, &req->objcore, HSH_RUSH_POLICY);
+	(void)HSH_DerefObjCore(wrk, &req->objcore);
 
 	if (busy != NULL) {
-		(void)HSH_DerefObjCore(wrk, &busy, 0);
+		(void)HSH_DerefObjCore(wrk, &busy);
 		VRY_Clear(req);
 	}
 
@@ -685,7 +691,7 @@ cnt_miss(struct worker *wrk, struct req *req)
 		wrk->stats->cache_miss++;
 		VBF_Fetch(wrk, req, req->objcore, req->stale_oc, VBF_NORMAL);
 		if (req->stale_oc != NULL)
-			(void)HSH_DerefObjCore(wrk, &req->stale_oc, 0);
+			(void)HSH_DerefObjCore(wrk, &req->stale_oc);
 		req->req_step = R_STP_FETCH;
 		return (REQ_FSM_MORE);
 	case VCL_RET_FAIL:
@@ -705,8 +711,8 @@ cnt_miss(struct worker *wrk, struct req *req)
 	}
 	VRY_Clear(req);
 	if (req->stale_oc != NULL)
-		(void)HSH_DerefObjCore(wrk, &req->stale_oc, 0);
-	AZ(HSH_DerefObjCore(wrk, &req->objcore, 1));
+		(void)HSH_DerefObjCore(wrk, &req->stale_oc);
+	AZ(HSH_DerefObjCore(wrk, &req->objcore));
 	return (REQ_FSM_MORE);
 }
 
@@ -1068,14 +1074,14 @@ cnt_purge(struct worker *wrk, struct req *req)
 	AZ(req->objcore);
 	req->hash_always_miss = 1;
 	lr = HSH_Lookup(req, &oc, &boc);
-	assert (lr == HSH_MISS);
+	assert(lr == HSH_MISS);
 	AZ(oc);
 	CHECK_OBJ_NOTNULL(boc, OBJCORE_MAGIC);
 	VRY_Finish(req, DISCARD);
 
 	(void)HSH_Purge(wrk, boc->objhead, req->t_req, 0, 0, 0);
 
-	AZ(HSH_DerefObjCore(wrk, &boc, 1));
+	AZ(HSH_DerefObjCore(wrk, &boc));
 
 	VCL_purge_method(req->vcl, wrk, req, NULL, NULL);
 	switch (wrk->vpi->handling) {
diff --git a/bin/varnishd/cache/cache_vary.c b/bin/varnishd/cache/cache_vary.c
index 101eca231b0..06956441fd3 100644
--- a/bin/varnishd/cache/cache_vary.c
+++ b/bin/varnishd/cache/cache_vary.c
@@ -224,7 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2)
 void
 VRY_Prep(struct req *req)
 {
-	if (req->hash_objhead == NULL) {
+	if (req->hash_oc == NULL) {
 		/* Not a waiting list return */
 		AZ(req->vary_b);
 		AZ(req->vary_e);
diff --git a/bin/varnishd/cache/cache_vrt_var.c b/bin/varnishd/cache/cache_vrt_var.c
index 8810fcc513d..3f1f3dd05d0 100644
--- a/bin/varnishd/cache/cache_vrt_var.c
+++ b/bin/varnishd/cache/cache_vrt_var.c
@@ -614,7 +614,7 @@ VRT_u_bereq_body(VRT_CTX)
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
 	if (ctx->bo->bereq_body != NULL) {
-		(void)HSH_DerefObjCore(ctx->bo->wrk, &ctx->bo->bereq_body, 0);
+		(void)HSH_DerefObjCore(ctx->bo->wrk, &ctx->bo->bereq_body);
 		http_Unset(ctx->bo->bereq, H_Content_Length);
 	}
 
diff --git a/bin/varnishd/hash/hash_slinger.h b/bin/varnishd/hash/hash_slinger.h
index a1a9c0e8214..1f0e980a7cb 100644
--- a/bin/varnishd/hash/hash_slinger.h
+++ b/bin/varnishd/hash/hash_slinger.h
@@ -51,16 +51,6 @@ struct hash_slinger {
 	hash_deref_f		*deref;
 };
 
-enum lookup_e {
-	HSH_CONTINUE,
-	HSH_MISS,
-	HSH_BUSY,
-	HSH_HIT,
-	HSH_HITMISS,
-	HSH_HITPASS,
-	HSH_GRACE
-};
-
 /* mgt_hash.c */
 void HSH_config(const char *);
 
diff --git a/bin/varnishd/storage/storage_lru.c b/bin/varnishd/storage/storage_lru.c
index 5e046e79468..ec0369c8173 100644
--- a/bin/varnishd/storage/storage_lru.c
+++ b/bin/varnishd/storage/storage_lru.c
@@ -205,6 +205,6 @@ LRU_NukeOne(struct worker *wrk, struct lru *lru)
 	ObjSlim(wrk, oc);
 
 	VSLb(wrk->vsl, SLT_ExpKill, "LRU xid=%ju", VXID(ObjGetXID(wrk, oc)));
-	(void)HSH_DerefObjCore(wrk, &oc, 0);	// Ref from HSH_Snipe
+	(void)HSH_DerefObjCore(wrk, &oc);	// Ref from HSH_Snipe
 	return (1);
 }
diff --git a/bin/varnishd/storage/storage_persistent_silo.c b/bin/varnishd/storage/storage_persistent_silo.c
index 81c24b4367f..eb96acd3adb 100644
--- a/bin/varnishd/storage/storage_persistent_silo.c
+++ b/bin/varnishd/storage/storage_persistent_silo.c
@@ -178,7 +178,7 @@ smp_load_seg(struct worker *wrk, const struct smp_sc *sc,
 		HSH_Insert(wrk, so->hash, oc, ban);
 		AN(oc->ban);
 		HSH_DerefBoc(wrk, oc);	// XXX Keep it an stream resurrection?
-		(void)HSH_DerefObjCore(wrk, &oc, HSH_RUSH_POLICY);
+		(void)HSH_DerefObjCore(wrk, &oc);
 		wrk->stats->n_vampireobject++;
 	}
 	Pool_Sumstat(wrk);
diff --git a/bin/varnishtest/tests/c00097.vtc b/bin/varnishtest/tests/c00097.vtc
index afc77fbd91f..796c6481cc6 100644
--- a/bin/varnishtest/tests/c00097.vtc
+++ b/bin/varnishtest/tests/c00097.vtc
@@ -53,7 +53,7 @@ client c4 {
 } -start
 
 # Wait until c2-c4 are on the waitinglist
-delay 1
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 3
 
 # Open up the response headers from s1, and as a result HSH_Unbusy
diff --git a/bin/varnishtest/tests/c00098.vtc b/bin/varnishtest/tests/c00098.vtc
index 5b34d3a75ad..14824527521 100644
--- a/bin/varnishtest/tests/c00098.vtc
+++ b/bin/varnishtest/tests/c00098.vtc
@@ -70,17 +70,17 @@ server s6 {
 
 varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_exponent=2" -arg "-p debug=+syncvsl" -arg "-p debug=+waitinglist" -vcl+backend {
 	sub vcl_backend_fetch {
-		if (bereq.http.client == "1") {
+		if (bereq.http.user-agent == "c1") {
 			set bereq.backend = s1;
-		} else if (bereq.http.client == "2") {
+		} else if (bereq.http.user-agent == "c2") {
 			set bereq.backend = s2;
-		} else if (bereq.http.client == "3") {
+		} else if (bereq.http.user-agent == "c3") {
 			set bereq.backend = s3;
-		} else if (bereq.http.client == "4") {
+		} else if (bereq.http.user-agent == "c4") {
 			set bereq.backend = s4;
-		} else if (bereq.http.client == "5") {
+		} else if (bereq.http.user-agent == "c5") {
 			set bereq.backend = s5;
-		} else if (bereq.http.client == "6") {
+		} else if (bereq.http.user-agent == "c6") {
 			set bereq.backend = s6;
 		}
 	}
@@ -90,7 +90,7 @@ varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_e
 } -start
 
 client c1 {
-	txreq -url /hfp -hdr "Client: 1"
+	txreq
 	rxresp
 } -start
 
@@ -98,32 +98,32 @@ client c1 {
 barrier b1 sync
 
 client c2 {
-	txreq -url /hfp -hdr "Client: 2"
+	txreq
 	rxresp
 } -start
 
 client c3 {
-	txreq -url /hfp -hdr "Client: 3"
+	txreq
 	rxresp
 } -start
 
 client c4 {
-	txreq -url /hfp -hdr "Client: 4"
+	txreq
 	rxresp
 } -start
 
 client c5 {
-	txreq -url /hfp -hdr "Client: 5"
+	txreq
 	rxresp
 } -start
 
 client c6 {
-	txreq -url /hfp -hdr "Client: 6"
+	txreq
 	rxresp
 } -start
 
 # Wait until c2-c6 are on the waitinglist
-delay 1
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 5
 
 # Open up the response headers from s1, and as a result HSH_Unbusy
diff --git a/bin/varnishtest/tests/c00099.vtc b/bin/varnishtest/tests/c00099.vtc
index 4bbd904a021..772279f923c 100644
--- a/bin/varnishtest/tests/c00099.vtc
+++ b/bin/varnishtest/tests/c00099.vtc
@@ -68,19 +68,22 @@ server s6 {
 	chunkedlen 0
 } -start
 
-varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_exponent=2" -arg "-p debug=+syncvsl" -arg "-p debug=+waitinglist" -vcl+backend {
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set rush_exponent 2"
+varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
+varnish v1 -vcl+backend {
 	sub vcl_backend_fetch {
-		if (bereq.http.client == "1") {
+		if (bereq.http.user-agent == "c1") {
 			set bereq.backend = s1;
-		} else if (bereq.http.client == "2") {
+		} else if (bereq.http.user-agent == "c2") {
 			set bereq.backend = s2;
-		} else if (bereq.http.client == "3") {
+		} else if (bereq.http.user-agent == "c3") {
 			set bereq.backend = s3;
-		} else if (bereq.http.client == "4") {
+		} else if (bereq.http.user-agent == "c4") {
 			set bereq.backend = s4;
-		} else if (bereq.http.client == "5") {
+		} else if (bereq.http.user-agent == "c5") {
 			set bereq.backend = s5;
-		} else if (bereq.http.client == "6") {
+		} else if (bereq.http.user-agent == "c6") {
 			set bereq.backend = s6;
 		}
 	}
@@ -90,40 +93,71 @@ varnish v1 -arg "-p thread_pools=1" -arg "-p thread_pool_min=30" -arg "-p rush_e
 } -start
 
 client c1 {
-	txreq -url /hfm -hdr "Client: 1"
+	txreq
 	rxresp
 } -start
 
 # This makes sure that c1->s1 is done first
 barrier b1 sync
 
+# This will ensure that c{2..6} enter c1's waiting list in order.
+logexpect l2 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c2"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l3 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c3"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l4 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c4"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l5 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c5"
+	expect * = Debug	"on waiting list"
+} -start
+logexpect l6 -v v1 -g raw {
+	expect * * ReqHeader	"User-Agent: c6"
+	expect * = Debug	"on waiting list"
+} -start
+
 client c2 {
-	txreq -url /hfm -hdr "Client: 2"
+	txreq
 	rxresp
 } -start
 
+logexpect l2 -wait
+
 client c3 {
-	txreq -url /hfm -hdr "Client: 3"
+	txreq
 	rxresp
 } -start
 
+logexpect l3 -wait
+
 client c4 {
-	txreq -url /hfm -hdr "Client: 4"
+	txreq
 	rxresp
 } -start
 
+logexpect l4 -wait
+
 client c5 {
-	txreq -url /hfm -hdr "Client: 5"
+	txreq
 	rxresp
 } -start
 
+logexpect l5 -wait
+
 client c6 {
-	txreq -url /hfm -hdr "Client: 6"
+	txreq
 	rxresp
 } -start
 
-# Wait until c2-c6 are on the waitinglist
-delay 1
+logexpect l6 -wait
+
+varnish v1 -vsl_catchup
 varnish v1 -expect busy_sleep == 5
 
 # Open up the response headers from s1, and as a result HSH_Unbusy
@@ -135,3 +169,19 @@ client c3 -wait
 client c4 -wait
 client c5 -wait
 client c6 -wait
+
+varnish v1 -vsl_catchup
+
+# Check the effect of rush_exponent=2, with limited VXID guarantees.
+
+logexpect l1 -v v1 -g raw -d 1 -q "vxid != 0" -i Debug {
+	expect * 1002	Debug "waiting list rush for req 1004"
+	expect 0 =	Debug "waiting list rush for req 1006"
+
+	# triggered by 1004 or 1006
+	expect * *	Debug "waiting list rush for req 1008"
+	expect 0 =	Debug "waiting list rush for req 1010"
+
+	# trigerred by any VXID except 1002
+	expect * *	Debug "waiting list rush for req 1012"
+} -run
diff --git a/bin/varnishtest/tests/c00125.vtc b/bin/varnishtest/tests/c00125.vtc
new file mode 100644
index 00000000000..8fbca4f46b2
--- /dev/null
+++ b/bin/varnishtest/tests/c00125.vtc
@@ -0,0 +1,156 @@
+varnishtest "successful expired waiting list hit"
+
+barrier b1 cond 2
+barrier b2 cond 2
+barrier b3 cond 2
+barrier b4 cond 2
+
+
+server s1 {
+	rxreq
+	expect req.http.user-agent == c1
+	expect req.http.bgfetch == false
+	barrier b1 sync
+	barrier b2 sync
+	txresp -hdr "Cache-Control: max-age=60" -hdr "Age: 120"
+
+	rxreq
+	expect req.http.user-agent == c3
+	expect req.http.bgfetch == true
+	txresp
+
+	# The no-cache case only works with a complicit VCL, for now.
+	rxreq
+	expect req.http.user-agent == c4
+	expect req.http.bgfetch == false
+	barrier b3 sync
+	barrier b4 sync
+	txresp -hdr "Cache-Control: no-cache"
+
+	rxreq
+	expect req.http.user-agent == c6
+	expect req.http.bgfetch == false
+	txresp -hdr "Cache-Control: no-cache"
+} -start
+
+varnish v1 -cliok "param.set default_grace 1h"
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
+varnish v1 -vcl+backend {
+	sub vcl_backend_fetch {
+		set bereq.http.bgfetch = bereq.is_bgfetch;
+	}
+	sub vcl_beresp_stale {
+		# We just validated a stale object, do not mark it as
+		# uncacheable. The object remains available for grace
+		# hits and background fetches.
+		return;
+	}
+	sub vcl_beresp_control {
+		if (beresp.http.cache-control == "no-cache") {
+			# Keep beresp.uncacheable clear.
+			return;
+		}
+	}
+	sub vcl_deliver {
+		set resp.http.obj-hits = obj.hits;
+		set resp.http.obj-ttl = obj.ttl;
+	}
+} -start
+
+client c1 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1001
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl < 0
+} -start
+
+barrier b1 sync
+
+client c2 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1004 1002"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl < 0
+} -start
+
+varnish v1 -expect busy_sleep == 1
+barrier b2 sync
+
+client c1 -wait
+client c2 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 1
+varnish v1 -expect cache_hit_grace == 0
+varnish v1 -expect s_bgfetch == 0
+
+client c3 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1006 1002"
+	expect resp.http.obj-hits == 2
+	expect resp.http.obj-ttl < 0
+} -run
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 2
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# The only way for a plain no-cache to be hit is to have a non-zero keep.
+varnish v1 -cliok "param.set default_ttl 0"
+varnish v1 -cliok "param.set default_grace 0"
+varnish v1 -cliok "param.set default_keep 1h"
+
+client c4 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1009
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -start
+
+barrier b3 sync
+
+client c5 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1012 1010"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl <= 0
+} -start
+
+varnish v1 -expect busy_sleep == 2
+barrier b4 sync
+
+client c4 -wait
+client c5 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 2
+varnish v1 -expect cache_hit == 3
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# No hit when not on the waiting list
+client c6 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1014
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -run
diff --git a/include/tbl/params.h b/include/tbl/params.h
index 83054eef3de..c64b22d64d3 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -800,7 +800,7 @@ PARAM_SIMPLE(
 	/* descr */
 	"How many parked request we start for each completed request on "
 	"the object.\n"
-	"NB: Even with the implict delay of delivery, this parameter "
+	"NB: Even with the implicit delay of delivery, this parameter "
 	"controls an exponential increase in number of worker threads.",
 	/* flags */	EXPERIMENTAL
 )