anantdayanithi commited on
Commit
06fe897
Β·
verified Β·
1 Parent(s): c56459e

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +347 -0
main.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import time
4
+ import json
5
+ import google.generativeai as genai
6
+ from langchain_core.prompts import PromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI
8
+ from dotenv import load_dotenv
9
+
10
+ # Configure page
11
+ st.set_page_config(
12
+ page_title="GitHub Repository Analyzer",
13
+ page_icon="πŸ”",
14
+ layout="wide"
15
+ )
16
+
17
+ # Add custom CSS
18
+ st.markdown("""
19
+ <style>
20
+ .metric-card {
21
+ background-color: #f0f2f6;
22
+ padding: 20px;
23
+ border-radius: 10px;
24
+ margin: 10px 0;
25
+ }
26
+ .repo-card {
27
+ background-color: white;
28
+ padding: 20px;
29
+ border-radius: 10px;
30
+ margin: 10px 0;
31
+ border: 1px solid #e6e6e6;
32
+ }
33
+ .stTextArea textarea {
34
+ height: 200px;
35
+ }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ # Initialize session state
40
+ if 'analysis_complete' not in st.session_state:
41
+ st.session_state.analysis_complete = False
42
+ if 'github_token' not in st.session_state:
43
+ st.session_state.github_token = ""
44
+ if 'gemini_key' not in st.session_state:
45
+ st.session_state.gemini_key = ""
46
+
47
+ def initialize_api(github_token, gemini_key):
48
+ """Initialize API configurations"""
49
+ try:
50
+ headers = {"Authorization": f"token {github_token}", "Accept": "application/vnd.github.v3+json"}
51
+
52
+ # Configure Gemini API
53
+ genai.configure(api_key=gemini_key)
54
+ llm = GoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2, google_api_key=gemini_key)
55
+
56
+ # Test the configuration
57
+ test_response = llm.invoke("Test")
58
+ return headers, llm
59
+ except Exception as e:
60
+ st.error(f"Error initializing APIs: {str(e)}")
61
+ st.error("Please ensure your API keys are correct and try again.")
62
+ return None, None
63
+
64
+ def get_github_repos(username, headers):
65
+ """Fetch repositories from a user's GitHub profile."""
66
+ url = f"https://api.github.com/users/{username}/repos"
67
+ response = requests.get(url, headers=headers)
68
+
69
+ if response.status_code == 200:
70
+ return response.json()
71
+ else:
72
+ st.error(f"Failed to fetch repositories. Status code: {response.status_code}")
73
+ return []
74
+
75
+ def get_repo_details(username, repo_name, headers):
76
+ """Fetch README, latest commits, and repo structure."""
77
+ readme_url = f"https://api.github.com/repos/{username}/{repo_name}/readme"
78
+ commits_url = f"https://api.github.com/repos/{username}/{repo_name}/commits"
79
+ contents_url = f"https://api.github.com/repos/{username}/{repo_name}/contents"
80
+ languages_url = f"https://api.github.com/repos/{username}/{repo_name}/languages"
81
+
82
+ readme_content = ""
83
+ commit_messages = []
84
+ file_structure = []
85
+ languages_used = []
86
+
87
+ with st.spinner(f"Fetching details for {repo_name}..."):
88
+ # Fetch README
89
+ readme_response = requests.get(readme_url, headers=headers)
90
+ if readme_response.status_code == 200:
91
+ readme_content = requests.get(readme_response.json()['download_url']).text
92
+
93
+ # Fetch latest 5 commits
94
+ commit_response = requests.get(commits_url, headers=headers)
95
+ if commit_response.status_code == 200:
96
+ commit_messages = [commit['commit']['message'] for commit in commit_response.json()[:5]]
97
+
98
+ # Fetch file structure
99
+ content_response = requests.get(contents_url, headers=headers)
100
+ if content_response.status_code == 200:
101
+ file_structure = [file['name'] for file in content_response.json()]
102
+
103
+ # Fetch languages used
104
+ lang_response = requests.get(languages_url, headers=headers)
105
+ if lang_response.status_code == 200:
106
+ languages_used = list(lang_response.json().keys())
107
+
108
+ return readme_content, commit_messages, file_structure, languages_used
109
+
110
+ def analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm):
111
+ """Use Gemini AI to analyze repository and match with JD."""
112
+ prompt_template = PromptTemplate(
113
+ input_variables=["readme", "files", "commits", "languages", "jd"],
114
+ template="""
115
+ You are an AI technical recruiter. Analyze the following GitHub project details and job description:
116
+
117
+ Job Description:
118
+ {jd}
119
+
120
+ Repository Details:
121
+ README: {readme}
122
+ File Structure: {files}
123
+ Commit Messages: {commits}
124
+ Languages: {languages}
125
+
126
+ Provide output as structured JSON:
127
+ {{
128
+ "languages": ["list of languages"],
129
+ "tech_stack": ["list of frameworks & libraries"],
130
+ "algorithms": ["list of key algorithms used"],
131
+ "complexity": "low/medium/high",
132
+ "commit_activity": "active/moderate/inactive",
133
+ "jd_match_score": "1-100",
134
+ "jd_match_reasons": ["list of reasons why this repository matches or doesn't match the JD"]
135
+ }}
136
+ """
137
+ )
138
+
139
+ try:
140
+ response = llm.invoke(prompt_template.format(
141
+ readme=readme,
142
+ files=", ".join(file_structure),
143
+ commits=", ".join(commits),
144
+ languages=", ".join(languages),
145
+ jd=jd
146
+ ))
147
+
148
+ json_start = response.find("{")
149
+ json_end = response.rfind("}") + 1
150
+ json_data = json.loads(response[json_start:json_end].strip())
151
+
152
+ return json_data
153
+
154
+ except Exception as e:
155
+ st.error(f"Error analyzing repository: {e}")
156
+ return {
157
+ "languages": [],
158
+ "tech_stack": [],
159
+ "algorithms": [],
160
+ "complexity": "unknown",
161
+ "commit_activity": "unknown",
162
+ "jd_match_score": 0,
163
+ "jd_match_reasons": []
164
+ }
165
+
166
+ def calculate_repo_score(analysis_data):
167
+ """Calculate a score for a repository based on its analysis and JD match."""
168
+ base_score = 0
169
+
170
+ # Score based on number of languages (max 10 points)
171
+ base_score += min(len(analysis_data['languages']) * 2, 10)
172
+
173
+ # Score based on tech stack (max 15 points)
174
+ base_score += min(len(analysis_data['tech_stack']) * 3, 15)
175
+
176
+ # Score based on algorithms (max 15 points)
177
+ base_score += min(len(analysis_data['algorithms']) * 3, 15)
178
+
179
+ # Score based on complexity (max 30 points)
180
+ complexity_scores = {"low": 10, "medium": 20, "high": 30, "unknown": 0}
181
+ base_score += complexity_scores.get(analysis_data['complexity'].lower(), 0)
182
+
183
+ # Score based on commit activity (max 30 points)
184
+ activity_scores = {"inactive": 10, "moderate": 20, "active": 30, "unknown": 0}
185
+ base_score += activity_scores.get(analysis_data['commit_activity'].lower(), 0)
186
+
187
+ # Include JD match score in final calculation
188
+ jd_match_score = float(analysis_data.get('jd_match_score', 0))
189
+
190
+ # Final score is weighted average of base score and JD match score
191
+ final_score = (base_score * 0.6) + (jd_match_score * 0.4)
192
+
193
+ return round(final_score)
194
+
195
+ def evaluate_candidate(total_score, num_repos):
196
+ """Evaluate candidate suitability based on average repository score."""
197
+ if num_repos == 0:
198
+ return "Unable to evaluate - no repositories found"
199
+
200
+ avg_score = total_score / num_repos
201
+ if avg_score >= 75:
202
+ return "Highly Suitable"
203
+ elif avg_score >= 50:
204
+ return "Moderately Suitable"
205
+ elif avg_score >= 25:
206
+ return "Potentially Suitable"
207
+ else:
208
+ return "Not Suitable"
209
+
210
+ def display_repo_analysis(repo_name, analysis_data, repo_score):
211
+ """Display repository analysis in Streamlit."""
212
+ with st.expander(f"πŸ“ {repo_name} - Score: {repo_score}/100", expanded=True):
213
+ col1, col2 = st.columns(2)
214
+
215
+ with col1:
216
+ st.markdown("### πŸ›  Technical Details")
217
+ st.write("**Languages:**", ", ".join(analysis_data['languages']))
218
+ st.write("**Tech Stack:**", ", ".join(analysis_data['tech_stack']) if analysis_data['tech_stack'] else "None detected")
219
+ st.write("**Algorithms:**", ", ".join(analysis_data['algorithms']) if analysis_data['algorithms'] else "None detected")
220
+
221
+ with col2:
222
+ st.markdown("### πŸ“Š Metrics")
223
+ st.write("**Complexity:**", analysis_data['complexity'].capitalize())
224
+ st.write("**Commit Activity:**", analysis_data['commit_activity'].capitalize())
225
+ st.write("**JD Match Score:**", f"{analysis_data.get('jd_match_score', 0)}/100")
226
+ st.progress(repo_score/100)
227
+
228
+ if analysis_data.get('jd_match_reasons'):
229
+ st.markdown("### 🎯 JD Match Analysis")
230
+ for reason in analysis_data['jd_match_reasons']:
231
+ st.write(f"- {reason}")
232
+
233
+ def analyze_github_repos(username, headers, llm, jd):
234
+ """Analyze GitHub projects and generate summaries."""
235
+ repos = get_github_repos(username, headers)
236
+ if not repos:
237
+ st.error("No repositories found or failed to fetch repositories.")
238
+ return []
239
+
240
+ results = []
241
+ total_score = 0
242
+ progress_bar = st.progress(0)
243
+
244
+ for idx, repo in enumerate(repos):
245
+ repo_name = repo['name']
246
+ with st.spinner(f"Analyzing {repo_name}..."):
247
+ readme, commits, file_structure, languages = get_repo_details(username, repo_name, headers)
248
+ analysis_data = analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm)
249
+ repo_score = calculate_repo_score(analysis_data)
250
+ total_score += repo_score
251
+ results.append((repo_name, analysis_data, repo_score))
252
+
253
+ progress_bar.progress((idx + 1) / len(repos))
254
+ time.sleep(1)
255
+
256
+ progress_bar.empty()
257
+ return results, total_score
258
+
259
+ def main():
260
+ st.title("πŸ” GitHub Repository Analyzer")
261
+ st.markdown("""
262
+ This tool analyzes GitHub repositories to evaluate technical capabilities and project quality.
263
+ Please provide the required information below to begin the analysis.
264
+ """)
265
+
266
+ # API Keys input
267
+ with st.expander("πŸ”‘ API Configuration", expanded=True):
268
+ col1, col2 = st.columns(2)
269
+ with col1:
270
+ github_token = st.text_input("GitHub Token", type="password",
271
+ value=st.session_state.get('github_token', ''))
272
+ with col2:
273
+ gemini_key = st.text_input("Google Gemini API Key", type="password",
274
+ value=st.session_state.get('gemini_key', ''))
275
+
276
+ # Job Description input
277
+ st.subheader("πŸ“ Job Description")
278
+ jd = st.text_area("Paste the job description here", height=200)
279
+
280
+ # GitHub username input
281
+ username = st.text_input("πŸ‘€ Enter GitHub Username")
282
+
283
+ # Save API keys to session state
284
+ if github_token:
285
+ st.session_state.github_token = github_token
286
+ if gemini_key:
287
+ st.session_state.gemini_key = gemini_key
288
+
289
+ if st.button("Analyze Repositories") and username and jd and github_token and gemini_key:
290
+ headers, llm = initialize_api(github_token, gemini_key)
291
+
292
+ if headers and llm:
293
+ with st.spinner("Analyzing repositories..."):
294
+ repo_analysis, total_score = analyze_github_repos(username, headers, llm, jd)
295
+
296
+ if repo_analysis:
297
+ num_repos = len(repo_analysis)
298
+
299
+ # Display overall summary
300
+ st.header("πŸ“Š Analysis Summary")
301
+ col1, col2, col3 = st.columns(3)
302
+
303
+ with col1:
304
+ st.metric("Total Repositories", num_repos)
305
+ with col2:
306
+ avg_score = round(total_score / num_repos if num_repos > 0 else 0)
307
+ st.metric("Average Repository Score", f"{avg_score}/100")
308
+ with col3:
309
+ suitability = evaluate_candidate(total_score, num_repos)
310
+ st.metric("Candidate Suitability", suitability)
311
+
312
+ # Display individual repository analysis
313
+ st.header("πŸ“ Repository Details")
314
+ sorted_analysis = sorted(repo_analysis, key=lambda x: x[2], reverse=True)
315
+
316
+ for repo_name, analysis_data, repo_score in sorted_analysis:
317
+ display_repo_analysis(repo_name, analysis_data, repo_score)
318
+
319
+ # Export option
320
+ if st.button("Export Analysis"):
321
+ export_data = {
322
+ "username": username,
323
+ "total_repos": num_repos,
324
+ "average_score": avg_score,
325
+ "suitability": suitability,
326
+ "repositories": [
327
+ {
328
+ "name": repo_name,
329
+ "score": repo_score,
330
+ "analysis": analysis_data
331
+ }
332
+ for repo_name, analysis_data, repo_score in sorted_analysis
333
+ ]
334
+ }
335
+ st.download_button(
336
+ "Download Analysis Report",
337
+ data=json.dumps(export_data, indent=2),
338
+ file_name=f"github_analysis_{username}.json",
339
+ mime="application/json"
340
+ )
341
+ else:
342
+ st.error("No repositories found or analysis failed.")
343
+ else:
344
+ st.error("Failed to initialize APIs. Please check your API keys and try again.")
345
+
346
+ if __name__ == "__main__":
347
+ main()