aryan79 commited on
Commit
f9b0ae0
·
verified ·
1 Parent(s): 161faeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -117
app.py CHANGED
@@ -4,82 +4,67 @@ import pandas as pd
4
  import gradio as gr
5
  import os
6
  from groq import Groq
7
- import creds # Assuming creds.py holds your API key as creds.api_key
8
 
9
- # Step 1: Scrape the free courses from Analytics Vidhya
10
- url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
- response = requests.get(url)
12
- soup = BeautifulSoup(response.content, 'html.parser')
 
 
13
 
14
- courses = []
15
-
16
- # Extracting course title, image, and course link
17
- for course_card in soup.find_all('header', class_='course-card__img-container'):
18
- img_tag = course_card.find('img', class_='course-card__img')
19
-
20
- if img_tag:
21
- title = img_tag.get('alt')
22
- image_url = img_tag.get('src')
23
 
24
- link_tag = course_card.find_previous('a')
25
- if link_tag:
26
- course_link = link_tag.get('href')
27
- if not course_link.startswith('http'):
28
- course_link = 'https://courses.analyticsvidhya.com' + course_link
 
 
 
 
29
 
30
- courses.append({
31
- 'title': title,
32
- 'image_url': image_url,
33
- 'course_link': course_link
34
- })
35
 
36
- # Step 2: Create DataFrame
37
- df = pd.DataFrame(courses)
38
 
39
- # Step 3: Initialize the Groq client and set the API key
40
- client = Groq(api_key=creds.api_key) # Properly passing the API key
 
 
 
41
 
42
  def search_courses(query):
43
  try:
44
  print(f"Searching for: {query}")
45
- print(f"Number of courses in database: {len(df)}")
46
-
47
- # Prepare the prompt for Groq
48
- prompt = f"""Given the following query: "{query}"
49
- Please analyze the query and rank the following courses based on their relevance to the query.
50
- Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
51
- Only return courses with a relevance score of 0.5 or higher.
52
- Return the results in the following format:
53
- Title: [Course Title]
54
- Relevance: [Score]
55
 
56
  Courses:
57
  {df['title'].to_string(index=False)}
58
  """
59
 
60
- print("Sending request to Groq...")
61
- # Get response from Groq
62
  response = client.chat.completions.create(
63
- model="mixtral-8x7b-32768", # Use the appropriate model
64
  messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
65
  {"role": "user", "content": prompt}],
66
  temperature=0.2,
67
  max_tokens=1000
68
  )
69
- print("Received response from Groq")
70
 
71
- # Parse Groq's response
72
  results = []
73
- print("Groq response content:")
74
- print(response.choices[0].message.content)
75
-
76
  for line in response.choices[0].message.content.split('\n'):
77
  if line.startswith('Title:'):
78
  title = line.split('Title:')[1].strip()
79
- print(f"Found title: {title}")
80
  elif line.startswith('Relevance:'):
81
  relevance = float(line.split('Relevance:')[1].strip())
82
- print(f"Relevance for {title}: {relevance}")
83
  if relevance >= 0.5:
84
  matching_courses = df[df['title'] == title]
85
  if not matching_courses.empty:
@@ -90,11 +75,7 @@ def search_courses(query):
90
  'course_link': course['course_link'],
91
  'score': relevance
92
  })
93
- print(f"Added course: {title}")
94
- else:
95
- print(f"Warning: Course not found in database: {title}")
96
 
97
- print(f"Number of results found: {len(results)}")
98
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
99
 
100
  except Exception as e:
@@ -128,71 +109,19 @@ def gradio_search(query):
128
 
129
  # Custom CSS for the Gradio interface
130
  custom_css = """
131
- body {
132
- font-family: Arial, sans-serif;
133
- background-color: #f0f2f5;
134
- }
135
- h1, h2, p, .container .examples {
136
- color: #333; /* Darker color for better visibility */
137
- }
138
- .container {
139
- max-width: 800px;
140
- margin: 0 auto;
141
- padding: 20px;
142
- }
143
- .results-container {
144
- display: flex;
145
- flex-wrap: wrap;
146
- justify-content: space-between;
147
- }
148
- .course-card {
149
- background-color: white;
150
- border-radius: 8px;
151
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
152
- margin-bottom: 20px;
153
- overflow: hidden;
154
- width: 48%;
155
- transition: transform 0.2s;
156
- }
157
- .course-card:hover {
158
- transform: translateY(-5px);
159
- }
160
- .course-image {
161
- width: 100%;
162
- height: 150px;
163
- object-fit: cover;
164
- }
165
- .course-info {
166
- padding: 15px;
167
- }
168
- .course-info h3 {
169
- margin-top: 0;
170
- font-size: 18px;
171
- color: #333;
172
- }
173
- .course-info p {
174
- color: #666;
175
- font-size: 14px;
176
- margin-bottom: 10px;
177
- }
178
- .course-link {
179
- display: inline-block;
180
- background-color: #007bff;
181
- color: white;
182
- padding: 8px 12px;
183
- text-decoration: none;
184
- border-radius: 4px;
185
- font-size: 14px;
186
- transition: background-color 0.2s;
187
- }
188
- .course-link:hover {
189
- background-color: #0056b3;
190
- }
191
- .no-results {
192
- text-align: center;
193
- color: #666;
194
- font-style: italic;
195
- }
196
  """
197
 
198
  # Gradio interface
 
4
  import gradio as gr
5
  import os
6
  from groq import Groq
7
+ import creds # Ensure creds.py contains `api_key` as `creds.api_key`
8
 
9
+ # Step 1: Scrape free courses from Analytics Vidhya
10
+ def scrape_courses():
11
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
+ response = requests.get(url)
13
+ soup = BeautifulSoup(response.content, 'html.parser')
14
+ courses = []
15
 
16
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
17
+ img_tag = course_card.find('img', class_='course-card__img')
 
 
 
 
 
 
 
18
 
19
+ if img_tag:
20
+ title = img_tag.get('alt')
21
+ image_url = img_tag.get('src')
22
+
23
+ link_tag = course_card.find_previous('a')
24
+ if link_tag:
25
+ course_link = link_tag.get('href')
26
+ if not course_link.startswith('http'):
27
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
28
 
29
+ courses.append({
30
+ 'title': title,
31
+ 'image_url': image_url,
32
+ 'course_link': course_link
33
+ })
34
 
35
+ return pd.DataFrame(courses)
 
36
 
37
+ # Initialize course DataFrame
38
+ df = scrape_courses()
39
+
40
+ # Step 2: Initialize the Groq client and set the API key
41
+ client = Groq(api_key=creds.api_key)
42
 
43
  def search_courses(query):
44
  try:
45
  print(f"Searching for: {query}")
46
+ prompt = f"""Given the query: "{query}"
47
+ Rank the following courses based on relevance to the query, prioritizing Analytics Vidhya courses.
48
+ Provide a relevance score (0-1) for each course, returning only those with a score >= 0.5.
 
 
 
 
 
 
 
49
 
50
  Courses:
51
  {df['title'].to_string(index=False)}
52
  """
53
 
 
 
54
  response = client.chat.completions.create(
55
+ model="mixtral-8x7b-32768",
56
  messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
57
  {"role": "user", "content": prompt}],
58
  temperature=0.2,
59
  max_tokens=1000
60
  )
 
61
 
 
62
  results = []
 
 
 
63
  for line in response.choices[0].message.content.split('\n'):
64
  if line.startswith('Title:'):
65
  title = line.split('Title:')[1].strip()
 
66
  elif line.startswith('Relevance:'):
67
  relevance = float(line.split('Relevance:')[1].strip())
 
68
  if relevance >= 0.5:
69
  matching_courses = df[df['title'] == title]
70
  if not matching_courses.empty:
 
75
  'course_link': course['course_link'],
76
  'score': relevance
77
  })
 
 
 
78
 
 
79
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
80
 
81
  except Exception as e:
 
109
 
110
  # Custom CSS for the Gradio interface
111
  custom_css = """
112
+ body { font-family: Arial, sans-serif; background-color: #f0f2f5; }
113
+ h1, h2, p, .container .examples { color: #333; }
114
+ .container { max-width: 800px; margin: 0 auto; padding: 20px; }
115
+ .results-container { display: flex; flex-wrap: wrap; justify-content: space-between; }
116
+ .course-card { background-color: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); margin-bottom: 20px; overflow: hidden; width: 48%; transition: transform 0.2s; }
117
+ .course-card:hover { transform: translateY(-5px); }
118
+ .course-image { width: 100%; height: 150px; object-fit: cover; }
119
+ .course-info { padding: 15px; }
120
+ .course-info h3 { margin-top: 0; font-size: 18px; color: #333; }
121
+ .course-info p { color: #666; font-size: 14px; margin-bottom: 10px; }
122
+ .course-link { display: inline-block; background-color: #007bff; color: white; padding: 8px 12px; text-decoration: none; border-radius: 4px; font-size: 14px; transition: background-color 0.2s; }
123
+ .course-link:hover { background-color: #0056b3; }
124
+ .no-results { text-align: center; color: #666; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  """
126
 
127
  # Gradio interface