#!/bin/bash # GitHub has started restricting daily cache usage to 10GB, but only allows # cache usage to be checked for individual repositories, with no way of getting # an overview of which repos are using excessive cache. # # This script runs `gh cache` command on aslected sub-directories of specified # paths, filters results with `jq` to previous 24-hour period, and puts all # combined results in to a single output file in the directory where this # script is run. # # The script should be run in a directory presumed to be two directories below # where all git repositories sit, so each sub-dir should be `/`. if ! command -v jq &>/dev/null; then echo "jq must be installed." exit 1 fi if ! command -v gh &>/dev/null; then echo "gh must be installed." exit 1 fi json_file="gh-cache-dat.json" # files stored in each directory from `gh cache` output_file="combined_data.json" # final output stored in pwd echo "=== Running 'gh cache' on sub-directories ===" find . -mindepth 2 -maxdepth 2 -type d | \ grep -e "mpadge" -e "ropensci/" -e "UrbanAnalyst" -e "ropensci-review-tools" \ | while read -r dir; do cd "$dir" || continue if gh cache list --json createdAt,sizeInBytes >> "$json_file" 2>/dev/null; then echo " Generated dat.json in $dir successfully" if [ -f "$json_file" ] && [ -s "$json_file" ]; then twenty_four_hours_ago=$(date -u -d '24 hours ago' +'%Y-%m-%dT%H:%M:%SZ') jq --arg cutoff "$twenty_four_hours_ago" ' if type == "array" then [.[] | select(.createdAt >= $cutoff)] else select(.createdAt >= $cutoff) end ' $json_file > temp.json && mv temp.json $json_file fi else echo " No gh cache from $dir" fi cd - > /dev/null done echo "=== Combining data ===" temp_file="combined_data_temp.json" dirname=$(pwd) echo "[" > "$temp_file" first_entry=true find . -mindepth 2 -maxdepth 3 -name "$json_file" | \ grep -e "mpadge" -e "ropensci/" -e "UrbanAnalyst" -e "ropensci-review-tools" \ | while read -r json_file; do json_path="${dirname}/${json_file#./}" if [ -s "$json_path" ] && command -v jq &>/dev/null; then json_content=$(cat "$json_file") if [ "$json_content" != "[]" ] && [ "$json_content" != "null" ] && [ -n "$json_content" ]; then if [ "$first_entry" = false ]; then echo "," >> "$temp_file" fi echo " jq on ${json_path} ..." jq --arg path "$json_path" ' if type == "array" then [.[] | .path = $path] else .path = $path end ' "$json_path" | sed '1d; $d' >> "$temp_file" first_entry=false fi fi rm "$json_path" done echo "]" >> "$temp_file" # Then group results by "path" and store total sum of cache sizes: if command -v jq &>/dev/null; then jq 'group_by(.path) | map({ path: .[0].path, total_size_mb: (((map(.sizeInBytes) | add) / (1024 * 1024)) | round), count: length })' "$temp_file" > "$output_file" else mv "$temp_file" "$output_file" fi rm -f "$temp_file" echo "Combined data saved to $output_file" echo "Total MB of cache in previous 24 hours:" jq . "$output_file" total_mb="$(jq 'map(.total_size_mb) | add' $output_file)" echo "Overall total: $total_mb MB"