File size: 3,547 Bytes
188e720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import argparse
from pathlib import Path
import json
import logging
import sys
import time

from code_summarizer import (
    clone_repo,
    summarize_repo,
    upload_summary_to_firebase,
    get_summaries_by_repo,
    is_firestore_available
)
# Import device/model status separately if needed for logging
from code_summarizer.summarizer import device as summarizer_device, MODEL_LOADED as SUMMARIZER_LOADED

# Basic logging config for the CLI app
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - [CLI] %(message)s')
log = logging.getLogger(__name__)

REPO_CLONE_DIR = "cloned_repo_cli"
OUTPUT_DIR = Path("outputs")
OUTPUT_FILE = OUTPUT_DIR / "summaries.json"

def run_pipeline(repo_url: str, skip_existing: bool = False, save_local: bool = True):
    start_time = time.time()
    log.info(f"Pipeline starting for: {repo_url}")

    if not SUMMARIZER_LOADED:
         log.error("Summarizer Model Not Loaded. Exiting.")
         sys.exit(1)

    firestore_ready = is_firestore_available()
    if not firestore_ready:
        log.warning("Firebase is not available. Uploads/Checks will be skipped.")

    if skip_existing and firestore_ready:
        log.info("Checking for existing summaries...")
        if get_summaries_by_repo(repo_url):
            log.warning("Skipping. Found existing summaries in Firebase.")
            return

    log.info("Cloning repository...")
    clone_dir_path = Path(REPO_CLONE_DIR)
    if not clone_repo(repo_url, str(clone_dir_path)):
        log.error("Repo cloning failed. Exiting.")
        sys.exit(1)

    log.info(f"Running summarization (device: {summarizer_device})...")
    summaries = summarize_repo(clone_dir_path, repo_url)
    if not summaries:
        log.warning("No functions found or summarization failed.")
        return

    log.info(f"Summarization complete. Found {len(summaries)} functions.")

    if firestore_ready:
        log.info(f"Uploading {len(summaries)} summaries to Firebase...")
        upload_count = 0
        for i, summary in enumerate(summaries):
            upload_summary_to_firebase(summary)
            upload_count +=1
            if (i + 1) % 100 == 0:
                 log.info(f"  Uploaded {i+1}/{len(summaries)}...")
        log.info(f"Finished uploading {upload_count} summaries.")
    else:
        log.info("Skipping Firebase upload.")

    if save_local:
        log.info(f"Saving summaries locally to {OUTPUT_FILE}...")
        try:
            OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
            with open(OUTPUT_FILE, "w", encoding='utf-8') as f:
                json.dump(summaries, f, indent=2, default=str)
            log.info(f"Saved local backup to {OUTPUT_FILE}")
        except Exception as e:
            log.error(f"Failed to save local backup: {e}", exc_info=True)

    duration = time.time() - start_time
    log.info(f"✅ Pipeline completed in {duration:.2f} seconds.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Code Summarizer CLI", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--url", required=True, help="HTTPS URL of the public GitHub repository.")
    parser.add_argument("--skip_existing", action="store_true", help="Skip if repo already summarized in Firebase.")
    parser.add_argument("--no_save", action="store_true", help="Disable saving local summaries.json.")
    args = parser.parse_args()

    run_pipeline(
        repo_url=args.url,
        skip_existing=args.skip_existing,
        save_local=not args.no_save
    )