aryan79 commited on
Commit
e8af220
·
verified ·
1 Parent(s): 0fa927e

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +213 -0
  2. creds.py +5 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+ import gradio as gr
5
+ import os
6
+ from groq import Groq
7
+ import creds # Assuming creds.py holds your API key as creds.api_key
8
+
9
+ # Step 1: Scrape the free courses from Analytics Vidhya
10
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
+ response = requests.get(url)
12
+ soup = BeautifulSoup(response.content, 'html.parser')
13
+
14
+ courses = []
15
+
16
+ # Extracting course title, image, and course link
17
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
18
+ img_tag = course_card.find('img', class_='course-card__img')
19
+
20
+ if img_tag:
21
+ title = img_tag.get('alt')
22
+ image_url = img_tag.get('src')
23
+
24
+ link_tag = course_card.find_previous('a')
25
+ if link_tag:
26
+ course_link = link_tag.get('href')
27
+ if not course_link.startswith('http'):
28
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
29
+
30
+ courses.append({
31
+ 'title': title,
32
+ 'image_url': image_url,
33
+ 'course_link': course_link
34
+ })
35
+
36
+ # Step 2: Create DataFrame
37
+ df = pd.DataFrame(courses)
38
+
39
+ # Step 3: Initialize the Groq client and set the API key
40
+ client = Groq(api_key=creds.api_key) # Properly passing the API key
41
+
42
+ def search_courses(query):
43
+ try:
44
+ print(f"Searching for: {query}")
45
+ print(f"Number of courses in database: {len(df)}")
46
+
47
+ # Prepare the prompt for Groq
48
+ prompt = f"""Given the following query: "{query}"
49
+ Please analyze the query and rank the following courses based on their relevance to the query.
50
+ Prioritize courses from Analytics Vidhya. Provide a relevance score from 0 to 1 for each course.
51
+ Only return courses with a relevance score of 0.5 or higher.
52
+ Return the results in the following format:
53
+ Title: [Course Title]
54
+ Relevance: [Score]
55
+
56
+ Courses:
57
+ {df['title'].to_string(index=False)}
58
+ """
59
+
60
+ print("Sending request to Groq...")
61
+ # Get response from Groq
62
+ response = client.chat.completions.create(
63
+ model="mixtral-8x7b-32768", # Use the appropriate model
64
+ messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."},
65
+ {"role": "user", "content": prompt}],
66
+ temperature=0.2,
67
+ max_tokens=1000
68
+ )
69
+ print("Received response from Groq")
70
+
71
+ # Parse Groq's response
72
+ results = []
73
+ print("Groq response content:")
74
+ print(response.choices[0].message.content)
75
+
76
+ for line in response.choices[0].message.content.split('\n'):
77
+ if line.startswith('Title:'):
78
+ title = line.split('Title:')[1].strip()
79
+ print(f"Found title: {title}")
80
+ elif line.startswith('Relevance:'):
81
+ relevance = float(line.split('Relevance:')[1].strip())
82
+ print(f"Relevance for {title}: {relevance}")
83
+ if relevance >= 0.5:
84
+ matching_courses = df[df['title'] == title]
85
+ if not matching_courses.empty:
86
+ course = matching_courses.iloc[0]
87
+ results.append({
88
+ 'title': title,
89
+ 'image_url': course['image_url'],
90
+ 'course_link': course['course_link'],
91
+ 'score': relevance
92
+ })
93
+ print(f"Added course: {title}")
94
+ else:
95
+ print(f"Warning: Course not found in database: {title}")
96
+
97
+ print(f"Number of results found: {len(results)}")
98
+ return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
99
+
100
+ except Exception as e:
101
+ print(f"An error occurred in search_courses: {str(e)}")
102
+ return []
103
+
104
+ def gradio_search(query):
105
+ result_list = search_courses(query)
106
+
107
+ if result_list:
108
+ html_output = '<div class="results-container">'
109
+ for item in result_list:
110
+ course_title = item['title']
111
+ course_image = item['image_url']
112
+ course_link = item['course_link']
113
+ relevance_score = round(item['score'] * 100, 2)
114
+
115
+ html_output += f'''
116
+ <div class="course-card">
117
+ <img src="{course_image}" alt="{course_title}" class="course-image"/>
118
+ <div class="course-info">
119
+ <h3>{course_title}</h3>
120
+ <p>Relevance: {relevance_score}%</p>
121
+ <a href="{course_link}" target="_blank" class="course-link">View Course</a>
122
+ </div>
123
+ </div>'''
124
+ html_output += '</div>'
125
+ return html_output
126
+ else:
127
+ return '<p class="no-results">No results found. Please try a different query.</p>'
128
+
129
+ # Custom CSS for the Gradio interface
130
+ custom_css = """
131
+ body {
132
+ font-family: Arial, sans-serif;
133
+ background-color: #f0f2f5;
134
+ }
135
+ .container {
136
+ max-width: 800px;
137
+ margin: 0 auto;
138
+ padding: 20px;
139
+ }
140
+ .results-container {
141
+ display: flex;
142
+ flex-wrap: wrap;
143
+ justify-content: space-between;
144
+ }
145
+ .course-card {
146
+ background-color: white;
147
+ border-radius: 8px;
148
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
149
+ margin-bottom: 20px;
150
+ overflow: hidden;
151
+ width: 48%;
152
+ transition: transform 0.2s;
153
+ }
154
+ .course-card:hover {
155
+ transform: translateY(-5px);
156
+ }
157
+ .course-image {
158
+ width: 100%;
159
+ height: 150px;
160
+ object-fit: cover;
161
+ }
162
+ .course-info {
163
+ padding: 15px;
164
+ }
165
+ .course-info h3 {
166
+ margin-top: 0;
167
+ font-size: 18px;
168
+ color: #333;
169
+ }
170
+ .course-info p {
171
+ color: #666;
172
+ font-size: 14px;
173
+ margin-bottom: 10px;
174
+ }
175
+ .course-link {
176
+ display: inline-block;
177
+ background-color: #007bff;
178
+ color: white;
179
+ padding: 8px 12px;
180
+ text-decoration: none;
181
+ border-radius: 4px;
182
+ font-size: 14px;
183
+ transition: background-color 0.2s;
184
+ }
185
+ .course-link:hover {
186
+ background-color: #0056b3;
187
+ }
188
+ .no-results {
189
+ text-align: center;
190
+ color: #666;
191
+ font-style: italic;
192
+ }
193
+ """
194
+
195
+ # Gradio interface
196
+ iface = gr.Interface(
197
+ fn=gradio_search,
198
+ inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"),
199
+ outputs=gr.HTML(label="Search Results"),
200
+ title="Analytics Vidhya Smart Course Search",
201
+ description="Find the most relevant courses from Analytics Vidhya based on your query.",
202
+ theme="huggingface",
203
+ css=custom_css,
204
+ examples=[
205
+ ["machine learning for beginners"],
206
+ ["advanced data visualization techniques"],
207
+ ["python programming basics"],
208
+ ["Business Analytics"]
209
+ ],
210
+ )
211
+
212
+ if __name__ == "__main__":
213
+ iface.launch()
creds.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # creds.py
2
+
3
+ # Store your Groq API key here
4
+ api_key = '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'
5
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ requests
2
+ beautifulsoup4
3
+ pandas
4
+ gradio
5
+ groq