[FSDP] Subtest `CPUOffload` for `test_fsdp_grad_acc.py` (#90545)

In preparation for the next PR, I wanted to reduce the time to run these gradient accumulation tests.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/90545
Approved by: https://github.com/mrshenli
diff --git a/test/distributed/fsdp/test_fsdp_grad_acc.py b/test/distributed/fsdp/test_fsdp_grad_acc.py
index ef20d2a..78c2731 100644
--- a/test/distributed/fsdp/test_fsdp_grad_acc.py
+++ b/test/distributed/fsdp/test_fsdp_grad_acc.py
@@ -220,7 +220,11 @@
                 None,
                 BackwardPrefetch.BACKWARD_PRE,
                 BackwardPrefetch.BACKWARD_POST,
-            ]
+            ],
+            "cpu_offload": [
+                CPUOffload(offload_params=False),
+                CPUOffload(offload_params=True),
+            ],
         }
 
     @skip_if_lt_x_gpu(2)
@@ -244,10 +248,6 @@
         ],
     )
     @parametrize(
-        "cpu_offload",
-        [CPUOffload(offload_params=False), CPUOffload(offload_params=True)],
-    )
-    @parametrize(
         "sharding_strategy",
         [
             ShardingStrategy.FULL_SHARD,
@@ -258,7 +258,6 @@
     def test_grad_acc(
         self,
         configs: _GradAccConfigs,
-        cpu_offload: CPUOffload,
         sharding_strategy: ShardingStrategy,
     ):
         """
@@ -281,7 +280,6 @@
             self._test_grad_acc,
             batch_dim=1,
             configs=configs.configs,
-            cpu_offload=cpu_offload,
             sharding_strategy=sharding_strategy,
         )