|
import requests |
|
from bs4 import BeautifulSoup |
|
import pandas as pd |
|
import gradio as gr |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import creds |
|
|
|
|
|
url = "https://courses.analyticsvidhya.com/pages/all-free-courses" |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
courses = [] |
|
|
|
|
|
for course_card in soup.find_all('header', class_='course-card__img-container'): |
|
img_tag = course_card.find('img', class_='course-card__img') |
|
|
|
if img_tag: |
|
title = img_tag.get('alt') |
|
image_url = img_tag.get('src') |
|
|
|
link_tag = course_card.find_previous('a') |
|
if link_tag: |
|
course_link = link_tag.get('href') |
|
if not course_link.startswith('http'): |
|
course_link = 'https://courses.analyticsvidhya.com' + course_link |
|
|
|
courses.append({ |
|
'title': title, |
|
'image_url': image_url, |
|
'course_link': course_link |
|
}) |
|
|
|
|
|
df = pd.DataFrame(courses) |
|
|
|
|
|
def preprocess_text(text): |
|
text = text.lower() |
|
text = text.replace("-", " ") |
|
return text |
|
|
|
df['processed_title'] = df['title'].apply(preprocess_text) |
|
|
|
|
|
vectorizer = TfidfVectorizer(stop_words='english') |
|
tfidf_matrix = vectorizer.fit_transform(df['processed_title']) |
|
|
|
def search_courses(query): |
|
|
|
processed_query = preprocess_text(query) |
|
query_vector = vectorizer.transform([processed_query]) |
|
|
|
|
|
similarities = cosine_similarity(query_vector, tfidf_matrix).flatten() |
|
df['relevance_score'] = similarities |
|
|
|
|
|
relevant_courses = df[df['relevance_score'] >= 0.3].sort_values(by='relevance_score', ascending=False) |
|
results = [] |
|
|
|
for _, course in relevant_courses.iterrows(): |
|
results.append({ |
|
'title': course['title'], |
|
'image_url': course['image_url'], |
|
'course_link': course['course_link'], |
|
'score': course['relevance_score'] |
|
}) |
|
|
|
return results[:10] |
|
|
|
def gradio_search(query): |
|
result_list = search_courses(query) |
|
|
|
if result_list: |
|
html_output = '<div class="results-container">' |
|
for item in result_list: |
|
course_title = item['title'] |
|
course_image = item['image_url'] |
|
course_link = item['course_link'] |
|
relevance_score = round(item['score'] * 100, 2) |
|
|
|
html_output += f''' |
|
<div class="course-card"> |
|
<img src="{course_image}" alt="{course_title}" class="course-image"/> |
|
<div class="course-info"> |
|
<h3>{course_title}</h3> |
|
<p>Relevance: {relevance_score}%</p> |
|
<a href="{course_link}" target="_blank" class="course-link">View Course</a> |
|
</div> |
|
</div>''' |
|
html_output += '</div>' |
|
return html_output |
|
else: |
|
return '<p class="no-results">No results found. Please try a different query.</p>' |
|
|
|
|
|
custom_css = """ |
|
body { |
|
font-family: Arial, sans-serif; |
|
background-color: #121212; |
|
color: #E0E0E0; |
|
} |
|
.container { |
|
max-width: 800px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
color: #E0E0E0; |
|
} |
|
.results-container { |
|
display: flex; |
|
flex-wrap: wrap; |
|
justify-content: space-between; |
|
} |
|
.course-card { |
|
background-color: #1E1E1E; |
|
border-radius: 8px; |
|
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5); |
|
margin-bottom: 20px; |
|
overflow: hidden; |
|
width: 48%; |
|
transition: transform 0.2s; |
|
} |
|
.course-card:hover { |
|
transform: translateY(-5px); |
|
} |
|
.course-image { |
|
width: 100%; |
|
height: 150px; |
|
object-fit: cover; |
|
} |
|
.course-info { |
|
padding: 15px; |
|
} |
|
.course-info h3 { |
|
margin-top: 0; |
|
font-size: 18px; |
|
color: #E0E0E0; |
|
} |
|
.course-info p { |
|
color: #B0B0B0; |
|
font-size: 14px; |
|
margin-bottom: 10px; |
|
} |
|
.course-link { |
|
display: inline-block; |
|
background-color: #007bff; |
|
color: white; |
|
padding: 8px 12px; |
|
text-decoration: none; |
|
border-radius: 4px; |
|
font-size: 14px; |
|
transition: background-color 0.2s; |
|
} |
|
.course-link:hover { |
|
background-color: #0056b3; |
|
} |
|
.no-results { |
|
text-align: center; |
|
color: #B0B0B0; |
|
font-style: italic; |
|
} |
|
""" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_search, |
|
inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"), |
|
outputs=gr.HTML(label="Search Results"), |
|
title="Analytics Vidhya Smart Course Search", |
|
description="Find the most relevant courses from Analytics Vidhya based on your query.", |
|
theme="huggingface", |
|
css=custom_css, |
|
examples=[ |
|
["machine learning for beginners"], |
|
["advanced data visualization techniques"], |
|
["python programming basics"], |
|
["Business Analytics"] |
|
], |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |