Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,7 +24,7 @@ if 'feedback' not in st.session_state:
|
|
24 |
st.session_state.feedback = {}
|
25 |
|
26 |
# Define subset size
|
27 |
-
SUBSET_SIZE =
|
28 |
|
29 |
class TextDataset(Dataset):
|
30 |
def __init__(self, texts: List[str], tokenizer, max_length: int = 512):
|
@@ -176,8 +176,6 @@ st.info(f"Running with a subset of {SUBSET_SIZE} repositories for testing purpos
|
|
176 |
data = precompute_embeddings(data, model, tokenizer)
|
177 |
|
178 |
|
179 |
-
# [Previous imports and code remain the same until the UI section]
|
180 |
-
|
181 |
# Main App Interface
|
182 |
st.title("Repository Recommender System π")
|
183 |
st.caption("Testing Version - Running on subset of data")
|
@@ -199,7 +197,7 @@ with col2:
|
|
199 |
if search_button and user_query.strip():
|
200 |
with st.spinner("Finding relevant repositories..."):
|
201 |
# Generate query embedding and get recommendations
|
202 |
-
query_embedding = generate_query_embedding(model, tokenizer, user_query)
|
203 |
recommendations = find_similar_repos(query_embedding, data, top_n)
|
204 |
|
205 |
# Save to history
|
@@ -212,34 +210,61 @@ if search_button and user_query.strip():
|
|
212 |
# Display recommendations
|
213 |
st.markdown("### π― Top Recommendations")
|
214 |
for idx, row in recommendations.iterrows():
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
|
|
|
|
240 |
if row['docstring']:
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
# Footer
|
245 |
st.markdown("---")
|
|
|
24 |
st.session_state.feedback = {}
|
25 |
|
26 |
# Define subset size
|
27 |
+
SUBSET_SIZE = 500 # Starting with 500 items for quick testing
|
28 |
|
29 |
class TextDataset(Dataset):
|
30 |
def __init__(self, texts: List[str], tokenizer, max_length: int = 512):
|
|
|
176 |
data = precompute_embeddings(data, model, tokenizer)
|
177 |
|
178 |
|
|
|
|
|
179 |
# Main App Interface
|
180 |
st.title("Repository Recommender System π")
|
181 |
st.caption("Testing Version - Running on subset of data")
|
|
|
197 |
if search_button and user_query.strip():
|
198 |
with st.spinner("Finding relevant repositories..."):
|
199 |
# Generate query embedding and get recommendations
|
200 |
+
query_embedding = generate_query_embedding(model, tokenizer, user_query)
|
201 |
recommendations = find_similar_repos(query_embedding, data, top_n)
|
202 |
|
203 |
# Save to history
|
|
|
210 |
# Display recommendations
|
211 |
st.markdown("### π― Top Recommendations")
|
212 |
for idx, row in recommendations.iterrows():
|
213 |
+
st.markdown(f"#### Repository {idx + 1}: {row['repo']}")
|
214 |
+
|
215 |
+
# Repository details in columns
|
216 |
+
col1, col2 = st.columns([2, 1])
|
217 |
+
with col1:
|
218 |
+
st.markdown(f"**URL:** [View Repository]({row['url']})")
|
219 |
+
st.markdown(f"**Path:** `{row['path']}`")
|
220 |
+
with col2:
|
221 |
+
st.metric("Match Score", f"{row['similarity']:.2%}")
|
222 |
+
|
223 |
+
# Feedback buttons in columns
|
224 |
+
feedback_col1, feedback_col2 = st.columns([1, 4])
|
225 |
+
with feedback_col1:
|
226 |
+
if st.button("π", key=f"like_{idx}"):
|
227 |
+
save_feedback(row['repo'], 'likes')
|
228 |
+
st.success("Thanks for your feedback!")
|
229 |
+
if st.button("π", key=f"dislike_{idx}"):
|
230 |
+
save_feedback(row['repo'], 'dislikes')
|
231 |
+
st.success("Thanks for your feedback!")
|
232 |
+
|
233 |
+
# Case Study and Documentation in tabs instead of nested expanders
|
234 |
+
tab1, tab2 = st.tabs(["π Case Study Brief", "π Documentation"])
|
235 |
+
|
236 |
+
with tab1:
|
237 |
+
st.markdown(generate_case_study(row))
|
238 |
+
|
239 |
+
with tab2:
|
240 |
if row['docstring']:
|
241 |
+
st.markdown(row['docstring'])
|
242 |
+
else:
|
243 |
+
st.info("No documentation available")
|
244 |
+
|
245 |
+
st.markdown("---") # Separator between repositories
|
246 |
+
|
247 |
+
# Sidebar for History and Stats
|
248 |
+
with st.sidebar:
|
249 |
+
st.header("π Search History")
|
250 |
+
if st.session_state.history:
|
251 |
+
for idx, item in enumerate(reversed(st.session_state.history[-5:])):
|
252 |
+
st.markdown(f"**Search {len(st.session_state.history)-idx}**")
|
253 |
+
st.markdown(f"Query: _{item['query'][:30]}..._")
|
254 |
+
st.caption(f"Time: {item['timestamp']}")
|
255 |
+
st.caption(f"Results: {len(item['results'])} repositories")
|
256 |
+
if st.button("Rerun this search", key=f"rerun_{idx}"):
|
257 |
+
st.session_state.rerun_query = item['query']
|
258 |
+
st.markdown("---")
|
259 |
+
else:
|
260 |
+
st.write("No search history yet")
|
261 |
+
|
262 |
+
st.header("π Usage Statistics")
|
263 |
+
st.write(f"Total Searches: {len(st.session_state.history)}")
|
264 |
+
if st.session_state.feedback:
|
265 |
+
feedback_df = pd.DataFrame(st.session_state.feedback).T
|
266 |
+
feedback_df['Total'] = feedback_df['likes'] + feedback_df['dislikes']
|
267 |
+
st.bar_chart(feedback_df[['likes', 'dislikes']])
|
268 |
|
269 |
# Footer
|
270 |
st.markdown("---")
|