topic-generator / scripts /github_analyzer.py
Namgyu-Youn's picture
Upload 7 files
545ce24 verified
raw
history blame
3.62 kB
from typing import Dict, List, Optional, Any
import aiohttp
from urllib.parse import urlparse
from .analyzer import TopicAnalyzer
from .error_handler import ErrorHandler
class GitHubAnalyzer:
CORE_FILES = [
'README.md',
'requirements.txt',
'pyproject.toml',
'package.json',
'main.py',
'app.py',
'train.py'
]
def __init__(self):
self.base_url = "https://raw.githubusercontent.com"
self.topic_analyzer = TopicAnalyzer()
self.error_handler = ErrorHandler()
def parse_github_url(self, url: str) -> tuple[str, str, str]:
"""Parse GitHub URL into components."""
try:
parsed = urlparse(url)
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 2:
return self.error_handler.handle_github_url_error(
url,
"URL must contain owner and repository"
)
owner = path_parts[0]
repo = path_parts[1]
branch = "main" # default branch
return owner, repo, branch
except Exception as e:
return self.error_handler.handle_github_url_error(url, str(e))
async def _fetch_file(self, session: aiohttp.ClientSession, url: str) -> Optional[str]:
"""Fetch a single file content."""
try:
async with session.get(url) as response:
if response.status == 200:
return await response.text()
return self.error_handler.handle_file_fetch_error(
url,
f"HTTP {response.status}"
)
except Exception as e:
return self.error_handler.handle_file_fetch_error(url, str(e))
async def analyze_repository(
self,
repo_url: str,
category: str,
subcategory: str
) -> Dict[str, Any]:
"""Analyze repository and generate comprehensive topics."""
try:
files_content = await self._fetch_core_files(repo_url)
if not files_content:
return self.error_handler.handle_file_fetch_error(
repo_url,
"No core files found"
)
# Analyze README content
readme_topics = []
if 'README.md' in files_content:
readme_topics = await self.topic_analyzer.generate_topics(
files_content['README.md'],
category,
subcategory
)
# Get dependencies
dependencies = await self._analyze_dependencies(files_content)
# Analyze Python files content
code_content = ""
for file in ['main.py', 'app.py', 'train.py']:
if file in files_content:
code_content += files_content[file] + "\n"
code_topics = []
if code_content:
code_topics = await self.topic_analyzer.generate_topics(
code_content,
category,
subcategory
)
return self.error_handler.success_response({
"readme_topics": readme_topics,
"code_topics": code_topics,
"dependencies": dependencies
})
except Exception as e:
return self.error_handler.handle_topic_analysis_error(
str(e),
{"repo_url": repo_url, "category": category, "subcategory": subcategory}
)