aryan79 commited on
Commit
09d0e26
·
verified ·
1 Parent(s): 3cc36a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -40
app.py CHANGED
@@ -2,8 +2,6 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
5
- from sklearn.feature_extraction.text import TfidfVectorizer
6
- from sklearn.metrics.pairwise import cosine_similarity
7
  import os
8
  from groq import Groq
9
  import creds # Assuming creds.py holds your API key as creds.api_key
@@ -39,40 +37,68 @@ for course_card in soup.find_all('header', class_='course-card__img-container'):
39
  df = pd.DataFrame(courses)
40
 
41
  # Step 3: Initialize the Groq client and set the API key
42
- client = Groq(api_key=creds.api_key)
43
 
44
  def search_courses(query):
45
  try:
46
- # Step 4: Preprocessing query and course titles for TF-IDF
47
- course_titles = df['title'].tolist()
48
- course_titles.append(query) # Add the query to the list of titles
49
 
50
- # Using TF-IDF to vectorize the course titles and query
51
- tfidf_vectorizer = TfidfVectorizer(stop_words='english')
52
- tfidf_matrix = tfidf_vectorizer.fit_transform(course_titles)
 
 
 
 
 
 
 
 
 
53
 
54
- # Compute cosine similarity between the query and course titles
55
- cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]).flatten()
 
 
 
 
 
 
 
 
56
 
57
- # Get the top 10 relevant courses based on cosine similarity
58
- top_indices = cosine_similarities.argsort()[-10:][::-1]
59
-
60
- # Step 5: Build results
61
  results = []
62
- for index in top_indices:
63
- relevance = cosine_similarities[index]
64
- if relevance >= 0.5: # Only consider courses with at least 50% relevance
65
- course = df.iloc[index]
66
- results.append({
67
- 'title': course['title'],
68
- 'image_url': course['image_url'],
69
- 'course_link': course['course_link'],
70
- 'score': round(relevance * 100, 2) # Show relevance as percentage
71
- })
72
 
73
- return results if results else []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  except Exception as e:
 
76
  return []
77
 
78
  def gradio_search(query):
@@ -84,7 +110,7 @@ def gradio_search(query):
84
  course_title = item['title']
85
  course_image = item['image_url']
86
  course_link = item['course_link']
87
- relevance_score = item['score']
88
 
89
  html_output += f'''
90
  <div class="course-card">
@@ -100,18 +126,17 @@ def gradio_search(query):
100
  else:
101
  return '<p class="no-results">No results found. Please try a different query.</p>'
102
 
103
- # Dark-themed CSS
104
  custom_css = """
105
  body {
106
  font-family: Arial, sans-serif;
107
- background-color: #121212; /* Dark background */
108
- color: #E0E0E0; /* Light text color for dark background */
109
  }
110
  .container {
111
  max-width: 800px;
112
  margin: 0 auto;
113
  padding: 20px;
114
- color: #E0E0E0;
115
  }
116
  .results-container {
117
  display: flex;
@@ -119,9 +144,9 @@ body {
119
  justify-content: space-between;
120
  }
121
  .course-card {
122
- background-color: #1E1E1E; /* Darker card background */
123
  border-radius: 8px;
124
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
125
  margin-bottom: 20px;
126
  overflow: hidden;
127
  width: 48%;
@@ -141,10 +166,10 @@ body {
141
  .course-info h3 {
142
  margin-top: 0;
143
  font-size: 18px;
144
- color: #E0E0E0; /* Light text color */
145
  }
146
  .course-info p {
147
- color: #B0B0B0; /* Slightly darker text color for contrast */
148
  font-size: 14px;
149
  margin-bottom: 10px;
150
  }
@@ -163,7 +188,7 @@ body {
163
  }
164
  .no-results {
165
  text-align: center;
166
- color: #B0B0B0;
167
  font-style: italic;
168
  }
169
  """
@@ -177,10 +202,11 @@ iface = gr.Interface(
177
  description="Find the most relevant courses from Analytics Vidhya based on your query.",
178
  theme="huggingface",
179
  css=custom_css,
180
- examples=[["machine learning for beginners"],
181
- ["advanced data visualization techniques"],
182
- ["python programming basics"],
183
- ["Business Analytics"]
 
184
  ],
185
  )
186
 
 
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
 
 
5
  import os
6
  from groq import Groq
7
  import creds # Assuming creds.py holds your API key as creds.api_key
 
37
  df = pd.DataFrame(courses)
38
 
39
  # Step 3: Initialize the Groq client and set the API key
40
+ client = Groq(api_key=creds.api_key) # Properly passing the API key
41
 
42
  def search_courses(query):
43
  try:
44
+ print(f"Searching for: {query}")
45
+ print(f"Number of courses in database: {len(df)}")
 
46
 
47
+ # Prepare the prompt for Groq
48
+ prompt = f"""Given the following query: "{query}"
49
+ Please analyze the query and rank the following courses based on their relevance to the query.
50
+ Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
51
+ Only return courses with a relevance score of 0.5 or higher.
52
+ Return the results in the following format:
53
+ Title: [Course Title]
54
+ Relevance: [Score]
55
+
56
+ Courses:
57
+ {df['title'].to_string(index=False)}
58
+ """
59
 
60
+ print("Sending request to Groq...")
61
+ # Get response from Groq
62
+ response = client.chat.completions.create(
63
+ model="mixtral-8x7b-32768", # Use the appropriate model
64
+ messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
65
+ {"role": "user", "content": prompt}],
66
+ temperature=0.2,
67
+ max_tokens=1000
68
+ )
69
+ print("Received response from Groq")
70
 
71
+ # Parse Groq's response
 
 
 
72
  results = []
73
+ print("Groq response content:")
74
+ print(response.choices[0].message.content)
 
 
 
 
 
 
 
 
75
 
76
+ for line in response.choices[0].message.content.split('\n'):
77
+ if line.startswith('Title:'):
78
+ title = line.split('Title:')[1].strip()
79
+ print(f"Found title: {title}")
80
+ elif line.startswith('Relevance:'):
81
+ relevance = float(line.split('Relevance:')[1].strip())
82
+ print(f"Relevance for {title}: {relevance}")
83
+ if relevance >= 0.5:
84
+ matching_courses = df[df['title'] == title]
85
+ if not matching_courses.empty:
86
+ course = matching_courses.iloc[0]
87
+ results.append({
88
+ 'title': title,
89
+ 'image_url': course['image_url'],
90
+ 'course_link': course['course_link'],
91
+ 'score': relevance
92
+ })
93
+ print(f"Added course: {title}")
94
+ else:
95
+ print(f"Warning: Course not found in database: {title}")
96
+
97
+ print(f"Number of results found: {len(results)}")
98
+ return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
99
 
100
  except Exception as e:
101
+ print(f"An error occurred in search_courses: {str(e)}")
102
  return []
103
 
104
  def gradio_search(query):
 
110
  course_title = item['title']
111
  course_image = item['image_url']
112
  course_link = item['course_link']
113
+ relevance_score = round(item['score'] * 100, 2)
114
 
115
  html_output += f'''
116
  <div class="course-card">
 
126
  else:
127
  return '<p class="no-results">No results found. Please try a different query.</p>'
128
 
129
+ # Custom CSS for the Gradio interface
130
  custom_css = """
131
  body {
132
  font-family: Arial, sans-serif;
133
+ background-color: #000000; /* Set background to black */
134
+ color: #ffffff; /* Set text color to white for contrast */
135
  }
136
  .container {
137
  max-width: 800px;
138
  margin: 0 auto;
139
  padding: 20px;
 
140
  }
141
  .results-container {
142
  display: flex;
 
144
  justify-content: space-between;
145
  }
146
  .course-card {
147
+ background-color: white;
148
  border-radius: 8px;
149
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
150
  margin-bottom: 20px;
151
  overflow: hidden;
152
  width: 48%;
 
166
  .course-info h3 {
167
  margin-top: 0;
168
  font-size: 18px;
169
+ color: #333;
170
  }
171
  .course-info p {
172
+ color: #666;
173
  font-size: 14px;
174
  margin-bottom: 10px;
175
  }
 
188
  }
189
  .no-results {
190
  text-align: center;
191
+ color: #666;
192
  font-style: italic;
193
  }
194
  """
 
202
  description="Find the most relevant courses from Analytics Vidhya based on your query.",
203
  theme="huggingface",
204
  css=custom_css,
205
+ examples=[
206
+ ["machine learning for beginners"],
207
+ ["advanced data visualization techniques"],
208
+ ["python programming basics"],
209
+ ["Business Analytics"]
210
  ],
211
  )
212