aryan79's picture
Update app.py
2bdf49a verified
raw
history blame
5.33 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import creds # Assuming creds.py holds your API key as creds.api_key
# Step 1: Scrape the free courses from Analytics Vidhya
url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
courses = []
# Extracting course title, image, and course link
for course_card in soup.find_all('header', class_='course-card__img-container'):
img_tag = course_card.find('img', class_='course-card__img')
if img_tag:
title = img_tag.get('alt')
image_url = img_tag.get('src')
link_tag = course_card.find_previous('a')
if link_tag:
course_link = link_tag.get('href')
if not course_link.startswith('http'):
course_link = 'https://courses.analyticsvidhya.com' + course_link
courses.append({
'title': title,
'image_url': image_url,
'course_link': course_link
})
# Step 2: Create DataFrame
df = pd.DataFrame(courses)
# Step 3: Text Processing for Improved Relevance
def preprocess_text(text):
text = text.lower()
text = text.replace("-", " ")
return text
df['processed_title'] = df['title'].apply(preprocess_text)
# Step 4: Generate TF-IDF Vectors for Titles
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['processed_title'])
def search_courses(query):
# Process query and generate its TF-IDF vector
processed_query = preprocess_text(query)
query_vector = vectorizer.transform([processed_query])
# Calculate cosine similarity
similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
df['relevance_score'] = similarities
# Filter and sort courses based on relevance score
relevant_courses = df[df['relevance_score'] >= 0.3].sort_values(by='relevance_score', ascending=False)
results = []
for _, course in relevant_courses.iterrows():
results.append({
'title': course['title'],
'image_url': course['image_url'],
'course_link': course['course_link'],
'score': course['relevance_score']
})
return results[:10]
def gradio_search(query):
result_list = search_courses(query)
if result_list:
html_output = '<div class="results-container">'
for item in result_list:
course_title = item['title']
course_image = item['image_url']
course_link = item['course_link']
relevance_score = round(item['score'] * 100, 2)
html_output += f'''
<div class="course-card">
<img src="{course_image}" alt="{course_title}" class="course-image"/>
<div class="course-info">
<h3>{course_title}</h3>
<p>Relevance: {relevance_score}%</p>
<a href="{course_link}" target="_blank" class="course-link">View Course</a>
</div>
</div>'''
html_output += '</div>'
return html_output
else:
return '<p class="no-results">No results found. Please try a different query.</p>'
# Dark-themed CSS
custom_css = """
body {
font-family: Arial, sans-serif;
background-color: #121212;
color: #E0E0E0;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 20px;
color: #E0E0E0;
}
.results-container {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
}
.course-card {
background-color: #1E1E1E;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
margin-bottom: 20px;
overflow: hidden;
width: 48%;
transition: transform 0.2s;
}
.course-card:hover {
transform: translateY(-5px);
}
.course-image {
width: 100%;
height: 150px;
object-fit: cover;
}
.course-info {
padding: 15px;
}
.course-info h3 {
margin-top: 0;
font-size: 18px;
color: #E0E0E0;
}
.course-info p {
color: #B0B0B0;
font-size: 14px;
margin-bottom: 10px;
}
.course-link {
display: inline-block;
background-color: #007bff;
color: white;
padding: 8px 12px;
text-decoration: none;
border-radius: 4px;
font-size: 14px;
transition: background-color 0.2s;
}
.course-link:hover {
background-color: #0056b3;
}
.no-results {
text-align: center;
color: #B0B0B0;
font-style: italic;
}
"""
# Gradio interface
iface = gr.Interface(
fn=gradio_search,
inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"),
outputs=gr.HTML(label="Search Results"),
title="Analytics Vidhya Smart Course Search",
description="Find the most relevant courses from Analytics Vidhya based on your query.",
theme="huggingface",
css=custom_css,
examples=[
["machine learning for beginners"],
["advanced data visualization techniques"],
["python programming basics"],
["Business Analytics"]
],
)
if __name__ == "__main__":
iface.launch()