Create script to upload test aggregation data (#97954)

<!--
copilot:summary
-->
### <samp>🤖 Generated by Copilot at 79f1b37</samp>

This pull request improves the workflow and data processing for uploading contribution and testing statistics to Rockset and S3. It renames and updates a workflow file, removes unused code from a script, and adds a new script to aggregate and upload test results.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/97954
Approved by: https://github.com/huydhn
diff --git a/.github/workflows/upload-contrib-stats.yml b/.github/workflows/nightly-rockset-uploads.yml
similarity index 78%
rename from .github/workflows/upload-contrib-stats.yml
rename to .github/workflows/nightly-rockset-uploads.yml
index 95f8e96..6977b62 100644
--- a/.github/workflows/upload-contrib-stats.yml
+++ b/.github/workflows/nightly-rockset-uploads.yml
@@ -1,4 +1,4 @@
-name: Upload contribution stats
+name: Nightly Upload to rockset
 
 on:
   schedule:
@@ -11,11 +11,14 @@
 
 jobs:
 
-  upload-contribution-stats:
+  upload-stats-to-rockset:
     runs-on: [self-hosted, linux.2xlarge]
     steps:
       - name: Checkout PyTorch
         uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
+        with:
+          fetch-depth: 1
+          submodules: false
 
       - run: |
           pip3 install requests==2.26
@@ -32,5 +35,7 @@
           max_attempts: 5
           retry_wait_seconds: 90
           command: |
+            echo "Uploading testing aggregate data" "$(date -d yesterday '+%Y-%m-%d')"
+            python3 -m tools.stats.upload_test_stat_aggregates --date "$(date -d yesterday '+%Y-%m-%d')"
             echo "Uploading external contribution stats for" "$(date -d yesterday '+%Y-%m-%d')"
             python3 -m tools.stats.upload_external_contrib_stats --startDate "$(date -d yesterday '+%Y-%m-%d')"
\ No newline at end of file
diff --git a/tools/stats/upload_external_contrib_stats.py b/tools/stats/upload_external_contrib_stats.py
index a371591..9196e3a 100644
--- a/tools/stats/upload_external_contrib_stats.py
+++ b/tools/stats/upload_external_contrib_stats.py
@@ -93,7 +93,6 @@
                 "date": str(period_begin_date),
                 "pr_count": pr_count,
                 "user_count": len(users),
-                "users": list(users),
             }
         )
         period_begin_date = period_end_date + datetime.timedelta(days=1)
diff --git a/tools/stats/upload_test_stat_aggregates.py b/tools/stats/upload_test_stat_aggregates.py
new file mode 100644
index 0000000..1eb67f4
--- /dev/null
+++ b/tools/stats/upload_test_stat_aggregates.py
@@ -0,0 +1,84 @@
+import argparse
+import ast
+import datetime
+import json
+import os
+import re
+from typing import Any, List, Union
+
+import rockset  # type: ignore[import]
+
+from tools.stats.upload_stats_lib import upload_to_s3
+
+
+def get_oncall_from_testfile(testfile: str) -> Union[List[str], None]:
+    path = f"test/{testfile}"
+    if not path.endswith(".py"):
+        path += ".py"
+    # get oncall on test file
+    try:
+        with open(path) as f:
+            for line in f:
+                if line.startswith("# Owner(s): "):
+                    possible_lists = re.findall(r"\[.*\]", line)
+                    if len(possible_lists) > 1:
+                        raise Exception("More than one list found")
+                    elif len(possible_lists) == 0:
+                        raise Exception("No oncalls found or file is badly formatted")
+                    oncalls = ast.literal_eval(possible_lists[0])
+                    return list(oncalls)
+    except Exception as e:
+        if "." in testfile:
+            return [f"module: {testfile.split('.')[0]}"]
+        else:
+            return ["module: unmarked"]
+    return None
+
+
+def get_test_stat_aggregates(date: datetime.date) -> Any:
+    # Initialize the Rockset client with your API key
+    rockset_api_key = os.environ["ROCKSET_API_KEY"]
+    rockset_api_server = "api.rs2.usw2.rockset.com"
+    iso_date = date.isoformat()
+    rs = rockset.RocksetClient(
+        host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
+    )
+
+    # Define the name of the Rockset collection and lambda function
+    collection_name = "commons"
+    lambda_function_name = "test_insights_per_daily_upload"
+    query_parameters = [
+        rockset.models.QueryParameter(name="startTime", type="string", value=iso_date)
+    ]
+    api_response = rs.QueryLambdas.execute_query_lambda(
+        query_lambda=lambda_function_name,
+        version="865e3748f31e9b59",
+        parameters=query_parameters,
+    )
+    for i in range(len(api_response["results"])):
+        oncalls = get_oncall_from_testfile(api_response["results"][i]["test_file"])
+        api_response["results"][i]["oncalls"] = oncalls
+    return json.loads(
+        json.dumps(api_response["results"], indent=4, sort_keys=True, default=str)
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Upload test stat aggregates to Rockset."
+    )
+    parser.add_argument(
+        "--date",
+        type=datetime.date.fromisoformat,
+        help="Date to upload test stat aggregates for (YYYY-MM-DD). Must be in the last 30 days",
+        required=True,
+    )
+    args = parser.parse_args()
+    if args.date < datetime.datetime.now().date() - datetime.timedelta(days=30):
+        raise ValueError("date must be in the last 30 days")
+    data = get_test_stat_aggregates(date=args.date)
+    upload_to_s3(
+        bucket_name="torchci-aggregated-stats",
+        key=f"test_data_aggregates/{str(args.date)}",
+        docs=data,
+    )