|
import requests |
|
from bs4 import BeautifulSoup |
|
import pandas as pd |
|
import gradio as gr |
|
import os |
|
from groq import Groq |
|
import creds |
|
|
|
|
|
def scrape_courses(): |
|
url = "https://courses.analyticsvidhya.com/pages/all-free-courses" |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
courses = [] |
|
|
|
for course_card in soup.find_all('header', class_='course-card__img-container'): |
|
img_tag = course_card.find('img', class_='course-card__img') |
|
|
|
if img_tag: |
|
title = img_tag.get('alt') |
|
image_url = img_tag.get('src') |
|
|
|
link_tag = course_card.find_previous('a') |
|
if link_tag: |
|
course_link = link_tag.get('href') |
|
if not course_link.startswith('http'): |
|
course_link = 'https://courses.analyticsvidhya.com' + course_link |
|
|
|
courses.append({ |
|
'title': title, |
|
'image_url': image_url, |
|
'course_link': course_link |
|
}) |
|
|
|
return pd.DataFrame(courses) |
|
|
|
|
|
df = scrape_courses() |
|
|
|
|
|
client = Groq(api_key=creds.api_key) |
|
|
|
def search_courses(query): |
|
try: |
|
print(f"Searching for: {query}") |
|
prompt = f"""Given the query: "{query}" |
|
Rank the following courses based on relevance to the query, prioritizing Analytics Vidhya courses. |
|
Provide a relevance score (0-1) for each course, returning only those with a score >= 0.5. |
|
|
|
Courses: |
|
{df['title'].to_string(index=False)} |
|
""" |
|
|
|
response = client.chat.completions.create( |
|
model="mixtral-8x7b-32768", |
|
messages=[{"role": "system", "content": "You are an AI assistant specialized in course recommendations."}, |
|
{"role": "user", "content": prompt}], |
|
temperature=0.2, |
|
max_tokens=1000 |
|
) |
|
|
|
results = [] |
|
for line in response.choices[0].message.content.split('\n'): |
|
if line.startswith('Title:'): |
|
title = line.split('Title:')[1].strip() |
|
elif line.startswith('Relevance:'): |
|
relevance = float(line.split('Relevance:')[1].strip()) |
|
if relevance >= 0.5: |
|
matching_courses = df[df['title'] == title] |
|
if not matching_courses.empty: |
|
course = matching_courses.iloc[0] |
|
results.append({ |
|
'title': title, |
|
'image_url': course['image_url'], |
|
'course_link': course['course_link'], |
|
'score': relevance |
|
}) |
|
|
|
return sorted(results, key=lambda x: x['score'], reverse=True)[:10] |
|
|
|
except Exception as e: |
|
print(f"An error occurred in search_courses: {str(e)}") |
|
return [] |
|
|
|
def gradio_search(query): |
|
result_list = search_courses(query) |
|
|
|
if result_list: |
|
html_output = '<div class="results-container">' |
|
for item in result_list: |
|
course_title = item['title'] |
|
course_image = item['image_url'] |
|
course_link = item['course_link'] |
|
relevance_score = round(item['score'] * 100, 2) |
|
|
|
html_output += f''' |
|
<div class="course-card"> |
|
<img src="{course_image}" alt="{course_title}" class="course-image"/> |
|
<div class="course-info"> |
|
<h3>{course_title}</h3> |
|
<p>Relevance: {relevance_score}%</p> |
|
<a href="{course_link}" target="_blank" class="course-link">View Course</a> |
|
</div> |
|
</div>''' |
|
html_output += '</div>' |
|
return html_output |
|
else: |
|
return '<p class="no-results">No results found. Please try a different query.</p>' |
|
|
|
|
|
custom_css = """ |
|
body { font-family: Arial, sans-serif; background-color: #f0f2f5; } |
|
h1, h2, p, .container .examples { color: #333; } |
|
.container { max-width: 800px; margin: 0 auto; padding: 20px; } |
|
.results-container { display: flex; flex-wrap: wrap; justify-content: space-between; } |
|
.course-card { background-color: white; border-radius: 8px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); margin-bottom: 20px; overflow: hidden; width: 48%; transition: transform 0.2s; } |
|
.course-card:hover { transform: translateY(-5px); } |
|
.course-image { width: 100%; height: 150px; object-fit: cover; } |
|
.course-info { padding: 15px; } |
|
.course-info h3 { margin-top: 0; font-size: 18px; color: #333; } |
|
.course-info p { color: #666; font-size: 14px; margin-bottom: 10px; } |
|
.course-link { display: inline-block; background-color: #007bff; color: white; padding: 8px 12px; text-decoration: none; border-radius: 4px; font-size: 14px; transition: background-color 0.2s; } |
|
.course-link:hover { background-color: #0056b3; } |
|
.no-results { text-align: center; color: #666; font-style: italic; } |
|
""" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_search, |
|
inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"), |
|
outputs=gr.HTML(label="Search Results"), |
|
title="Analytics Vidhya Smart Course Search", |
|
description="Find the most relevant courses from Analytics Vidhya based on your query.", |
|
theme="huggingface", |
|
css=custom_css, |
|
examples=[ |
|
["machine learning for beginners"], |
|
["advanced data visualization techniques"], |
|
["python programming basics"], |
|
["Business Analytics"] |
|
], |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |