import requests from bs4 import BeautifulSoup import pandas as pd import gradio as gr from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import creds # Assuming creds.py holds your API key as creds.api_key # Step 1: Scrape the free courses from Analytics Vidhya url = "https://courses.analyticsvidhya.com/pages/all-free-courses" response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') courses = [] # Extracting course title, image, and course link for course_card in soup.find_all('header', class_='course-card__img-container'): img_tag = course_card.find('img', class_='course-card__img') if img_tag: title = img_tag.get('alt') image_url = img_tag.get('src') link_tag = course_card.find_previous('a') if link_tag: course_link = link_tag.get('href') if not course_link.startswith('http'): course_link = 'https://courses.analyticsvidhya.com' + course_link courses.append({ 'title': title, 'image_url': image_url, 'course_link': course_link }) # Step 2: Create DataFrame df = pd.DataFrame(courses) # Step 3: Text Processing for Improved Relevance def preprocess_text(text): text = text.lower() text = text.replace("-", " ") return text df['processed_title'] = df['title'].apply(preprocess_text) # Step 4: Generate TF-IDF Vectors for Titles vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = vectorizer.fit_transform(df['processed_title']) def search_courses(query): # Process query and generate its TF-IDF vector processed_query = preprocess_text(query) query_vector = vectorizer.transform([processed_query]) # Calculate cosine similarity similarities = cosine_similarity(query_vector, tfidf_matrix).flatten() df['relevance_score'] = similarities # Filter and sort courses based on relevance score relevant_courses = df[df['relevance_score'] >= 0.3].sort_values(by='relevance_score', ascending=False) results = [] for _, course in relevant_courses.iterrows(): results.append({ 'title': course['title'], 'image_url': course['image_url'], 'course_link': course['course_link'], 'score': course['relevance_score'] }) return results[:10] def gradio_search(query): result_list = search_courses(query) if result_list: html_output = '
No results found. Please try a different query.
' # Dark-themed CSS custom_css = """ body { font-family: Arial, sans-serif; background-color: #121212; color: #E0E0E0; } .container { max-width: 800px; margin: 0 auto; padding: 20px; color: #E0E0E0; } .results-container { display: flex; flex-wrap: wrap; justify-content: space-between; } .course-card { background-color: #1E1E1E; border-radius: 8px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5); margin-bottom: 20px; overflow: hidden; width: 48%; transition: transform 0.2s; } .course-card:hover { transform: translateY(-5px); } .course-image { width: 100%; height: 150px; object-fit: cover; } .course-info { padding: 15px; } .course-info h3 { margin-top: 0; font-size: 18px; color: #E0E0E0; } .course-info p { color: #B0B0B0; font-size: 14px; margin-bottom: 10px; } .course-link { display: inline-block; background-color: #007bff; color: white; padding: 8px 12px; text-decoration: none; border-radius: 4px; font-size: 14px; transition: background-color 0.2s; } .course-link:hover { background-color: #0056b3; } .no-results { text-align: center; color: #B0B0B0; font-style: italic; } """ # Gradio interface iface = gr.Interface( fn=gradio_search, inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"), outputs=gr.HTML(label="Search Results"), title="Analytics Vidhya Smart Course Search", description="Find the most relevant courses from Analytics Vidhya based on your query.", theme="huggingface", css=custom_css, examples=[ ["machine learning for beginners"], ["advanced data visualization techniques"], ["python programming basics"], ["Business Analytics"] ], ) if __name__ == "__main__": iface.launch()