Spaces:
Sleeping
Sleeping
File size: 3,547 Bytes
188e720 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import argparse
from pathlib import Path
import json
import logging
import sys
import time
from code_summarizer import (
clone_repo,
summarize_repo,
upload_summary_to_firebase,
get_summaries_by_repo,
is_firestore_available
)
# Import device/model status separately if needed for logging
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED
# Basic logging config for the CLI app
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [CLI] %(message)s')
log = logging.getLogger(__name__)
REPO_CLONE_DIR = "cloned_repo_cli"
OUTPUT_DIR = Path("outputs")
OUTPUT_FILE = OUTPUT_DIR / "summaries.json"
def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True):
start_time = time.time()
log.info(f"Pipeline starting for: {repo_url}")
if not SUMMARIZER_LOADED:
log.error("Summarizer Model Not Loaded. Exiting.")
sys.exit(1)
firestore_ready = is_firestore_available()
if not firestore_ready:
log.warning("Firebase is not available. Uploads/Checks will be skipped.")
if skip_existing and firestore_ready:
log.info("Checking for existing summaries...")
if get_summaries_by_repo(repo_url):
log.warning("Skipping. Found existing summaries in Firebase.")
return
log.info("Cloning repository...")
clone_dir_path = Path(REPO_CLONE_DIR)
if not clone_repo(repo_url, str(clone_dir_path)):
log.error("Repo cloning failed. Exiting.")
sys.exit(1)
log.info(f"Running summarization (device: {summarizer_device})...")
summaries = summarize_repo(clone_dir_path, repo_url)
if not summaries:
log.warning("No functions found or summarization failed.")
return
log.info(f"Summarization complete. Found {len(summaries)} functions.")
if firestore_ready:
log.info(f"Uploading {len(summaries)} summaries to Firebase...")
upload_count = 0
for i, summary in enumerate(summaries):
upload_summary_to_firebase(summary)
upload_count +=1
if (i + 1) % 100 == 0:
log.info(f" Uploaded {i+1}/{len(summaries)}...")
log.info(f"Finished uploading {upload_count} summaries.")
else:
log.info("Skipping Firebase upload.")
if save_local:
log.info(f"Saving summaries locally to {OUTPUT_FILE}...")
try:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
with open(OUTPUT_FILE, "w", encoding='utf-8') as f:
json.dump(summaries, f, indent=2, default=str)
log.info(f"Saved local backup to {OUTPUT_FILE}")
except Exception as e:
log.error(f"Failed to save local backup: {e}", exc_info=True)
duration = time.time() - start_time
log.info(f"✅ Pipeline completed in {duration:.2f} seconds.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Code Summarizer CLI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--url", required=True, help="HTTPS URL of the public GitHub repository.")
parser.add_argument("--skip_existing", action="store_true", help="Skip if repo already summarized in Firebase.")
parser.add_argument("--no_save", action="store_true", help="Disable saving local summaries.json.")
args = parser.parse_args()
run_pipeline(
repo_url=args.url,
skip_existing=args.skip_existing,
save_local=not args.no_save
) |