| #!/usr/bin/env python3 |
| """Calculates the disk size of each repository managed by 'repo'. |
| |
| This script invokes 'repo forall ...' to get the disk usage |
| for each project repository and prints the combined output as a CSV. |
| """ |
| |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| |
| def get_repo_disk_usage() -> dict[str, int]: |
| """Invokes 'repo forall -p -c du -s .' and parses the output into a dictionary. |
| |
| Returns: |
| A dictionary mapping project paths (str) to their disk size in bytes (int). |
| Returns an empty dictionary if the input is empty or malformed. |
| |
| Raises: |
| subprocess.CalledProcessError: If the command returns a non-zero exit |
| code. |
| FileNotFoundError: If the 'repo' command is not found. |
| """ |
| output = subprocess.check_output( |
| ["repo", "forall", "-p", "-c", "du", "-s", "-b", "."], |
| text=True, |
| ) |
| |
| project_sizes: dict[str, int] = {} |
| lines = output.strip().split("\n") |
| current_project_name = None |
| |
| for line in lines: |
| line = line.strip() |
| if not line: |
| continue # Skip empty lines |
| |
| if line.startswith("project "): |
| # Extract project name: remove "project " prefix and trailing "/" |
| current_project_name = line.removeprefix("project ").removesuffix("/") |
| elif current_project_name is not None: |
| match = re.match(r"^(\d+)\s+\.$", line) |
| if not match: |
| continue |
| size_str = match.group(1) |
| project_sizes[current_project_name] = int(size_str) |
| current_project_name = None # Reset for the next project |
| |
| return project_sizes |
| |
| |
| def get_dot_repo_size() -> int: |
| """Gets the disk usage of the '.repo' directory in bytes. |
| |
| Returns: |
| The size of the '.repo' directory in bytes. Returns 0 if the command |
| fails or the directory doesn't exist (du returns 0). |
| |
| Raises: |
| FileNotFoundError: If the 'du' command is not found. |
| # Note: subprocess.CalledProcessError is not explicitly raised on failure |
| # because we want to return 0 in that case. |
| """ |
| |
| result = subprocess.check_output(["du", "-s", "-b", ".repo"], text=True) |
| size_str = result.split()[0] |
| return int(size_str) |
| |
| |
| def main(): |
| if not os.path.isdir(".repo"): |
| sys.exit("Error: .repo directory not found, run inside a repo root.") |
| |
| project_sizes = get_repo_disk_usage() |
| dot_repo_size = get_dot_repo_size() |
| |
| print("project_name,size_bytes") |
| print(f".repo,{dot_repo_size}") |
| for name, size_bytes in sorted(project_sizes.items()): |
| print(f"{name},{size_bytes}") |
| |
| |
| if __name__ == "__main__": |
| main() |