aryan79 commited on
Commit
6d78687
·
verified ·
1 Parent(s): f9b0ae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -46
app.py CHANGED
@@ -4,67 +4,82 @@ import pandas as pd
4
  import gradio as gr
5
  import os
6
  from groq import Groq
7
- import creds # Ensure creds.py contains `api_key` as `creds.api_key`
8
 
9
- # Step 1: Scrape free courses from Analytics Vidhya
10
- def scrape_courses():
11
- url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
12
- response = requests.get(url)
13
- soup = BeautifulSoup(response.content, 'html.parser')
14
- courses = []
15
 
16
- for course_card in soup.find_all('header', class_='course-card__img-container'):
17
- img_tag = course_card.find('img', class_='course-card__img')
18
-
19
- if img_tag:
20
- title = img_tag.get('alt')
21
- image_url = img_tag.get('src')
22
-
23
- link_tag = course_card.find_previous('a')
24
- if link_tag:
25
- course_link = link_tag.get('href')
26
- if not course_link.startswith('http'):
27
- course_link = 'https://courses.analyticsvidhya.com' + course_link
28
 
29
- courses.append({
30
- 'title': title,
31
- 'image_url': image_url,
32
- 'course_link': course_link
33
- })
 
 
 
 
 
 
 
 
34
 
35
- return pd.DataFrame(courses)
 
 
 
 
36
 
37
- # Initialize course DataFrame
38
- df = scrape_courses()
39
 
40
- # Step 2: Initialize the Groq client and set the API key
41
- client = Groq(api_key=creds.api_key)
42
 
43
  def search_courses(query):
44
  try:
45
  print(f"Searching for: {query}")
46
- prompt = f"""Given the query: "{query}"
47
- Rank the following courses based on relevance to the query, prioritizing Analytics Vidhya courses.
48
- Provide a relevance score (0-1) for each course, returning only those with a score >= 0.5.
 
 
 
 
 
 
 
49
 
50
  Courses:
51
  {df['title'].to_string(index=False)}
52
  """
53
 
 
 
54
  response = client.chat.completions.create(
55
- model="mixtral-8x7b-32768",
56
  messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
57
  {"role": "user", "content": prompt}],
58
  temperature=0.2,
59
  max_tokens=1000
60
  )
 
61
 
 
62
  results = []
 
 
 
63
  for line in response.choices[0].message.content.split('\n'):
64
  if line.startswith('Title:'):
65
  title = line.split('Title:')[1].strip()
 
66
  elif line.startswith('Relevance:'):
67
  relevance = float(line.split('Relevance:')[1].strip())
 
68
  if relevance >= 0.5:
69
  matching_courses = df[df['title'] == title]
70
  if not matching_courses.empty:
@@ -75,7 +90,11 @@ def search_courses(query):
75
  'course_link': course['course_link'],
76
  'score': relevance
77
  })
 
 
 
78
 
 
79
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
80
 
81
  except Exception as e:
@@ -109,19 +128,71 @@ def gradio_search(query):
109
 
110
  # Custom CSS for the Gradio interface
111
  custom_css = """
112
- body { font-family: Arial, sans-serif; background-color: #f0f2f5; }
113
- h1, h2, p, .container .examples { color: #333; }
114
- .container { max-width: 800px; margin: 0 auto; padding: 20px; }
115
- .results-container { display: flex; flex-wrap: wrap; justify-content: space-between; }
116
- .course-card { background-color: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); margin-bottom: 20px; overflow: hidden; width: 48%; transition: transform 0.2s; }
117
- .course-card:hover { transform: translateY(-5px); }
118
- .course-image { width: 100%; height: 150px; object-fit: cover; }
119
- .course-info { padding: 15px; }
120
- .course-info h3 { margin-top: 0; font-size: 18px; color: #333; }
121
- .course-info p { color: #666; font-size: 14px; margin-bottom: 10px; }
122
- .course-link { display: inline-block; background-color: #007bff; color: white; padding: 8px 12px; text-decoration: none; border-radius: 4px; font-size: 14px; transition: background-color 0.2s; }
123
- .course-link:hover { background-color: #0056b3; }
124
- .no-results { text-align: center; color: #666; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  """
126
 
127
  # Gradio interface
 
4
  import gradio as gr
5
  import os
6
  from groq import Groq
7
+ import creds # Assuming creds.py holds your API key as creds.api_key
8
 
9
+ # Step 1: Scrape the free courses from Analytics Vidhya
10
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
+ response = requests.get(url)
12
+ soup = BeautifulSoup(response.content, 'html.parser')
 
 
13
 
14
+ courses = []
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Extracting course title, image, and course link
17
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
18
+ img_tag = course_card.find('img', class_='course-card__img')
19
+
20
+ if img_tag:
21
+ title = img_tag.get('alt')
22
+ image_url = img_tag.get('src')
23
+
24
+ link_tag = course_card.find_previous('a')
25
+ if link_tag:
26
+ course_link = link_tag.get('href')
27
+ if not course_link.startswith('http'):
28
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
29
 
30
+ courses.append({
31
+ 'title': title,
32
+ 'image_url': image_url,
33
+ 'course_link': course_link
34
+ })
35
 
36
+ # Step 2: Create DataFrame
37
+ df = pd.DataFrame(courses)
38
 
39
+ # Step 3: Initialize the Groq client and set the API key
40
+ client = Groq(api_key=creds.api_key) # Properly passing the API key
41
 
42
  def search_courses(query):
43
  try:
44
  print(f"Searching for: {query}")
45
+ print(f"Number of courses in database: {len(df)}")
46
+
47
+ # Prepare the prompt for Groq
48
+ prompt = f"""Given the following query: "{query}"
49
+ Please analyze the query and rank the following courses based on their relevance to the query.
50
+ Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
51
+ Only return courses with a relevance score of 0.5 or higher.
52
+ Return the results in the following format:
53
+ Title: [Course Title]
54
+ Relevance: [Score]
55
 
56
  Courses:
57
  {df['title'].to_string(index=False)}
58
  """
59
 
60
+ print("Sending request to Groq...")
61
+ # Get response from Groq
62
  response = client.chat.completions.create(
63
+ model="mixtral-8x7b-32768", # Use the appropriate model
64
  messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
65
  {"role": "user", "content": prompt}],
66
  temperature=0.2,
67
  max_tokens=1000
68
  )
69
+ print("Received response from Groq")
70
 
71
+ # Parse Groq's response
72
  results = []
73
+ print("Groq response content:")
74
+ print(response.choices[0].message.content)
75
+
76
  for line in response.choices[0].message.content.split('\n'):
77
  if line.startswith('Title:'):
78
  title = line.split('Title:')[1].strip()
79
+ print(f"Found title: {title}")
80
  elif line.startswith('Relevance:'):
81
  relevance = float(line.split('Relevance:')[1].strip())
82
+ print(f"Relevance for {title}: {relevance}")
83
  if relevance >= 0.5:
84
  matching_courses = df[df['title'] == title]
85
  if not matching_courses.empty:
 
90
  'course_link': course['course_link'],
91
  'score': relevance
92
  })
93
+ print(f"Added course: {title}")
94
+ else:
95
+ print(f"Warning: Course not found in database: {title}")
96
 
97
+ print(f"Number of results found: {len(results)}")
98
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
99
 
100
  except Exception as e:
 
128
 
129
  # Custom CSS for the Gradio interface
130
  custom_css = """
131
+ body {
132
+ font-family: Arial, sans-serif;
133
+ background-color: #f0f2f5;
134
+ }
135
+ h1, h2, p, .container .examples {
136
+ color: #333; /* Darker color for better visibility */
137
+ }
138
+ .container {
139
+ max-width: 800px;
140
+ margin: 0 auto;
141
+ padding: 20px;
142
+ }
143
+ .results-container {
144
+ display: flex;
145
+ flex-wrap: wrap;
146
+ justify-content: space-between;
147
+ }
148
+ .course-card {
149
+ background-color: white;
150
+ border-radius: 8px;
151
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
152
+ margin-bottom: 20px;
153
+ overflow: hidden;
154
+ width: 48%;
155
+ transition: transform 0.2s;
156
+ }
157
+ .course-card:hover {
158
+ transform: translateY(-5px);
159
+ }
160
+ .course-image {
161
+ width: 100%;
162
+ height: 150px;
163
+ object-fit: cover;
164
+ }
165
+ .course-info {
166
+ padding: 15px;
167
+ }
168
+ .course-info h3 {
169
+ margin-top: 0;
170
+ font-size: 18px;
171
+ color: #333;
172
+ }
173
+ .course-info p {
174
+ color: #666;
175
+ font-size: 14px;
176
+ margin-bottom: 10px;
177
+ }
178
+ .course-link {
179
+ display: inline-block;
180
+ background-color: #007bff;
181
+ color: white;
182
+ padding: 8px 12px;
183
+ text-decoration: none;
184
+ border-radius: 4px;
185
+ font-size: 14px;
186
+ transition: background-color 0.2s;
187
+ }
188
+ .course-link:hover {
189
+ background-color: #0056b3;
190
+ }
191
+ .no-results {
192
+ text-align: center;
193
+ color: #666;
194
+ font-style: italic;
195
+ }
196
  """
197
 
198
  # Gradio interface