Spaces:
Running
Running
Update github_repo_analyzer.py
Browse files- github_repo_analyzer.py +43 -16
github_repo_analyzer.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import sys
|
3 |
import tempfile
|
4 |
import shutil
|
5 |
-
from urllib.parse import urlparse
|
6 |
import requests
|
7 |
from github import Github
|
8 |
from git import Repo
|
@@ -45,8 +45,28 @@ def get_repo_info(input_str):
|
|
45 |
|
46 |
def clone_repo(owner, repo_name, temp_dir):
|
47 |
repo_url = f"https://github.com/{owner}/{repo_name}.git"
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def analyze_code(repo_path):
|
52 |
file_types = defaultdict(int)
|
@@ -519,18 +539,20 @@ def llm_synthesize_findings(client, code_analysis, issues_analysis, pr_analysis)
|
|
519 |
|
520 |
return call_llm(client, prompt, max_tokens=8192)
|
521 |
|
522 |
-
def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis
|
523 |
repo_url = f"https://github.com/{repo_info['owner']}/{repo_info['repo_name']}"
|
524 |
-
|
|
|
525 |
report = f"""# LLM-Assisted Workflow Analysis for {repo_info['owner']}/{repo_info['repo_name']}
|
526 |
|
527 |
## Repository Overview
|
528 |
-
- Owner
|
529 |
-
- Repository
|
530 |
-
- URL
|
531 |
-
- File types
|
532 |
|
533 |
## Code Analysis
|
|
|
534 |
"""
|
535 |
|
536 |
if isinstance(code_analysis.get('llm_analysis'), dict):
|
@@ -541,12 +563,14 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
541 |
report += f"- {pattern}\n"
|
542 |
|
543 |
report += "\n### Best Practices\n"
|
544 |
-
report += "
|
545 |
for practice in code_llm_analysis.get('best_practices', {}).get('followed', []):
|
546 |
report += f"- {practice}\n"
|
547 |
-
report += "\n
|
|
|
548 |
for practice in code_llm_analysis.get('best_practices', {}).get('missing', []):
|
549 |
report += f"- {practice}\n"
|
|
|
550 |
|
551 |
report += "\n### Areas for Improvement\n"
|
552 |
for area in code_llm_analysis.get('areas_for_improvement', []):
|
@@ -554,12 +578,16 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
554 |
|
555 |
report += "\n### Potential Vulnerabilities\n"
|
556 |
for vuln in code_llm_analysis.get('potential_vulnerabilities', []):
|
557 |
-
|
|
|
|
|
558 |
|
559 |
report += "\n### LLM-Assisted Coding Opportunities\n"
|
560 |
for opp in code_llm_analysis.get('llm_opportunities', []):
|
|
|
|
|
561 |
report += f"- **Task:** {opp['task']}\n"
|
562 |
-
report += f" - **File:** `{
|
563 |
report += f" - **Improvement:** {opp['improvement']}\n\n"
|
564 |
else:
|
565 |
report += "No structured code analysis available.\n"
|
@@ -574,7 +602,7 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
574 |
report += "\n### Promising LLM-Assisted Workflows for Issues\n"
|
575 |
for workflow in issues_analysis['summary'].get('promising_workflows', []):
|
576 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
577 |
-
report += f" - **Example Issue:** [{workflow['applicable_issue']}]({repo_url}/issues/{workflow['applicable_issue']})\n\n"
|
578 |
|
579 |
report += "### Overall Recommendations for Issue Management\n"
|
580 |
for rec in issues_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
@@ -592,7 +620,7 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
592 |
report += "\n### Promising LLM-Assisted Workflows for Pull Requests\n"
|
593 |
for workflow in pr_analysis['summary'].get('promising_workflows', []):
|
594 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
595 |
-
report += f" - **Example PR:** [{workflow['applicable_pr']}]({repo_url}/pull/{workflow['applicable_pr']})\n\n"
|
596 |
|
597 |
report += "### Overall Recommendations for PR Process\n"
|
598 |
for rec in pr_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
@@ -600,5 +628,4 @@ def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis, fina
|
|
600 |
else:
|
601 |
report += "No structured pull requests analysis available.\n"
|
602 |
|
603 |
-
report += f"\n## Synthesis and Recommendations\n{final_analysis}\n"
|
604 |
return report
|
|
|
2 |
import sys
|
3 |
import tempfile
|
4 |
import shutil
|
5 |
+
from urllib.parse import urlparse, quote
|
6 |
import requests
|
7 |
from github import Github
|
8 |
from git import Repo
|
|
|
45 |
|
46 |
def clone_repo(owner, repo_name, temp_dir):
|
47 |
repo_url = f"https://github.com/{owner}/{repo_name}.git"
|
48 |
+
|
49 |
+
# Clone the repository
|
50 |
+
repo = Repo.clone_from(repo_url, temp_dir)
|
51 |
+
|
52 |
+
# Get the default branch
|
53 |
+
default_branch = repo.active_branch.name
|
54 |
+
|
55 |
+
# Get the full name of the repository (owner/repo_name)
|
56 |
+
full_name = f"{owner}/{repo_name}"
|
57 |
+
|
58 |
+
# Create the repo_info object
|
59 |
+
repo_info = {
|
60 |
+
"owner": owner,
|
61 |
+
"repo_name": repo_name,
|
62 |
+
"full_name": full_name,
|
63 |
+
"repo_url": repo_url,
|
64 |
+
"local_path": temp_dir,
|
65 |
+
"default_branch": default_branch,
|
66 |
+
"git_repo": repo
|
67 |
+
}
|
68 |
+
|
69 |
+
return repo_info
|
70 |
|
71 |
def analyze_code(repo_path):
|
72 |
file_types = defaultdict(int)
|
|
|
539 |
|
540 |
return call_llm(client, prompt, max_tokens=8192)
|
541 |
|
542 |
+
def generate_report(repo_info, code_analysis, issues_analysis, pr_analysis):
|
543 |
repo_url = f"https://github.com/{repo_info['owner']}/{repo_info['repo_name']}"
|
544 |
+
default_branch = repo_info.get('default_branch', 'master') # Assume 'master' if not provided
|
545 |
+
|
546 |
report = f"""# LLM-Assisted Workflow Analysis for {repo_info['owner']}/{repo_info['repo_name']}
|
547 |
|
548 |
## Repository Overview
|
549 |
+
- **Owner:** {repo_info['owner']}
|
550 |
+
- **Repository:** {repo_info['repo_name']}
|
551 |
+
- **URL:** [{repo_url}]({repo_url})
|
552 |
+
- **File types:** {', '.join(f"{ext} ({count})" for ext, count in code_analysis.get('file_types', {}).items())}
|
553 |
|
554 |
## Code Analysis
|
555 |
+
|
556 |
"""
|
557 |
|
558 |
if isinstance(code_analysis.get('llm_analysis'), dict):
|
|
|
563 |
report += f"- {pattern}\n"
|
564 |
|
565 |
report += "\n### Best Practices\n"
|
566 |
+
report += "<details><summary>Followed</summary>\n\n"
|
567 |
for practice in code_llm_analysis.get('best_practices', {}).get('followed', []):
|
568 |
report += f"- {practice}\n"
|
569 |
+
report += "</details>\n\n"
|
570 |
+
report += "<details><summary>Missing</summary>\n\n"
|
571 |
for practice in code_llm_analysis.get('best_practices', {}).get('missing', []):
|
572 |
report += f"- {practice}\n"
|
573 |
+
report += "</details>\n"
|
574 |
|
575 |
report += "\n### Areas for Improvement\n"
|
576 |
for area in code_llm_analysis.get('areas_for_improvement', []):
|
|
|
578 |
|
579 |
report += "\n### Potential Vulnerabilities\n"
|
580 |
for vuln in code_llm_analysis.get('potential_vulnerabilities', []):
|
581 |
+
relative_path = os.path.relpath(vuln['file_path'], repo_info['local_path'])
|
582 |
+
file_url = f"{repo_url}/blob/{default_branch}/{quote(relative_path)}"
|
583 |
+
report += f"- [{vuln['description']}]({file_url}) (Severity: {vuln['severity']})\n"
|
584 |
|
585 |
report += "\n### LLM-Assisted Coding Opportunities\n"
|
586 |
for opp in code_llm_analysis.get('llm_opportunities', []):
|
587 |
+
relative_path = os.path.relpath(opp['file_path'], repo_info['local_path'])
|
588 |
+
file_url = f"{repo_url}/blob/{default_branch}/{quote(relative_path)}"
|
589 |
report += f"- **Task:** {opp['task']}\n"
|
590 |
+
report += f" - **File:** [`{relative_path}`]({file_url})\n"
|
591 |
report += f" - **Improvement:** {opp['improvement']}\n\n"
|
592 |
else:
|
593 |
report += "No structured code analysis available.\n"
|
|
|
602 |
report += "\n### Promising LLM-Assisted Workflows for Issues\n"
|
603 |
for workflow in issues_analysis['summary'].get('promising_workflows', []):
|
604 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
605 |
+
report += f" - **Example Issue:** [#{workflow['applicable_issue']}]({repo_url}/issues/{workflow['applicable_issue']})\n\n"
|
606 |
|
607 |
report += "### Overall Recommendations for Issue Management\n"
|
608 |
for rec in issues_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
|
620 |
report += "\n### Promising LLM-Assisted Workflows for Pull Requests\n"
|
621 |
for workflow in pr_analysis['summary'].get('promising_workflows', []):
|
622 |
report += f"- **Workflow:** {workflow['workflow']}\n"
|
623 |
+
report += f" - **Example PR:** [#{workflow['applicable_pr']}]({repo_url}/pull/{workflow['applicable_pr']})\n\n"
|
624 |
|
625 |
report += "### Overall Recommendations for PR Process\n"
|
626 |
for rec in pr_analysis['summary'].get('overall_recommendations', ['No recommendations available.']):
|
|
|
628 |
else:
|
629 |
report += "No structured pull requests analysis available.\n"
|
630 |
|
|
|
631 |
return report
|