aryan79's picture
Update app.py
3cc36a9 verified
raw
history blame
5.82 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os
from groq import Groq
import creds # Assuming creds.py holds your API key as creds.api_key
# Step 1: Scrape the free courses from Analytics Vidhya
url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
courses = []
# Extracting course title, image, and course link
for course_card in soup.find_all('header', class_='course-card__img-container'):
img_tag = course_card.find('img', class_='course-card__img')
if img_tag:
title = img_tag.get('alt')
image_url = img_tag.get('src')
link_tag = course_card.find_previous('a')
if link_tag:
course_link = link_tag.get('href')
if not course_link.startswith('http'):
course_link = 'https://courses.analyticsvidhya.com' + course_link
courses.append({
'title': title,
'image_url': image_url,
'course_link': course_link
})
# Step 2: Create DataFrame
df = pd.DataFrame(courses)
# Step 3: Initialize the Groq client and set the API key
client = Groq(api_key=creds.api_key)
def search_courses(query):
try:
# Step 4: Preprocessing query and course titles for TF-IDF
course_titles = df['title'].tolist()
course_titles.append(query) # Add the query to the list of titles
# Using TF-IDF to vectorize the course titles and query
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(course_titles)
# Compute cosine similarity between the query and course titles
cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]).flatten()
# Get the top 10 relevant courses based on cosine similarity
top_indices = cosine_similarities.argsort()[-10:][::-1]
# Step 5: Build results
results = []
for index in top_indices:
relevance = cosine_similarities[index]
if relevance >= 0.5: # Only consider courses with at least 50% relevance
course = df.iloc[index]
results.append({
'title': course['title'],
'image_url': course['image_url'],
'course_link': course['course_link'],
'score': round(relevance * 100, 2) # Show relevance as percentage
})
return results if results else []
except Exception as e:
return []
def gradio_search(query):
result_list = search_courses(query)
if result_list:
html_output = '<div class="results-container">'
for item in result_list:
course_title = item['title']
course_image = item['image_url']
course_link = item['course_link']
relevance_score = item['score']
html_output += f'''
<div class="course-card">
<img src="{course_image}" alt="{course_title}" class="course-image"/>
<div class="course-info">
<h3>{course_title}</h3>
<p>Relevance: {relevance_score}%</p>
<a href="{course_link}" target="_blank" class="course-link">View Course</a>
</div>
</div>'''
html_output += '</div>'
return html_output
else:
return '<p class="no-results">No results found. Please try a different query.</p>'
# Dark-themed CSS
custom_css = """
body {
font-family: Arial, sans-serif;
background-color: #121212; /* Dark background */
color: #E0E0E0; /* Light text color for dark background */
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #E0E0E0;
}
.results-container {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
}
.course-card {
background-color: #1E1E1E; /* Darker card background */
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
margin-bottom: 20px;
overflow: hidden;
width: 48%;
transition: transform 0.2s;
}
.course-card:hover {
transform: translateY(-5px);
}
.course-image {
width: 100%;
height: 150px;
object-fit: cover;
}
.course-info {
padding: 15px;
}
.course-info h3 {
margin-top: 0;
font-size: 18px;
color: #E0E0E0; /* Light text color */
}
.course-info p {
color: #B0B0B0; /* Slightly darker text color for contrast */
font-size: 14px;
margin-bottom: 10px;
}
.course-link {
display: inline-block;
background-color: #007bff;
color: white;
padding: 8px 12px;
text-decoration: none;
border-radius: 4px;
font-size: 14px;
transition: background-color 0.2s;
}
.course-link:hover {
background-color: #0056b3;
}
.no-results {
text-align: center;
color: #B0B0B0;
font-style: italic;
}
"""
# Gradio interface
iface = gr.Interface(
fn=gradio_search,
inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"),
outputs=gr.HTML(label="Search Results"),
title="Analytics Vidhya Smart Course Search",
description="Find the most relevant courses from Analytics Vidhya based on your query.",
theme="huggingface",
css=custom_css,
examples=[["machine learning for beginners"],
["advanced data visualization techniques"],
["python programming basics"],
["Business Analytics"]
],
)
if __name__ == "__main__":
iface.launch()