From 50ec241cf744f6eeb7543b84300f4c72e28eaa8b Mon Sep 17 00:00:00 2001 From: fc Date: Fri, 10 May 2024 10:16:31 +0800 Subject: [PATCH] [FIX] fix deadlock in PipeEngine._exec_recv_grads --- deepspeed/runtime/pipe/engine.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/deepspeed/runtime/pipe/engine.py b/deepspeed/runtime/pipe/engine.py index ab4919a0f0abd..5ab4f3a102984 100644 --- a/deepspeed/runtime/pipe/engine.py +++ b/deepspeed/runtime/pipe/engine.py @@ -1206,9 +1206,7 @@ def _exec_recv_grads(self, buffer_id): # branches on is_grad_partitioned so we don't filter out the # metadata tensor. if self.is_grad_partitioned: - sizes_and_dtypes = [(list(t.size()), t.dtype) - for t in outputs[:2]] + [(list(t.size()), t.dtype) - for t in outputs[2:] if t.is_floating_point()] + sizes_and_dtypes = [(list(t.size()), t.dtype) for t in outputs[:2]] else: sizes_and_dtypes = [(list(t.size()), t.dtype) for t in outputs if t.is_floating_point()] self.grad_layer = self._allocate_buffers(sizes_and_dtypes, num_buffers=1)[0]