import requests from bs4 import BeautifulSoup import pandas as pd import gradio as gr from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import os from groq import Groq import creds # Assuming creds.py holds your API key as creds.api_key # Step 1: Scrape the free courses from Analytics Vidhya url = "https://courses.analyticsvidhya.com/pages/all-free-courses" response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') courses = [] # Extracting course title, image, and course link for course_card in soup.find_all('header', class_='course-card__img-container'): img_tag = course_card.find('img', class_='course-card__img') if img_tag: title = img_tag.get('alt') image_url = img_tag.get('src') link_tag = course_card.find_previous('a') if link_tag: course_link = link_tag.get('href') if not course_link.startswith('http'): course_link = 'https://courses.analyticsvidhya.com' + course_link courses.append({ 'title': title, 'image_url': image_url, 'course_link': course_link }) # Step 2: Create DataFrame df = pd.DataFrame(courses) # Step 3: Initialize the Groq client and set the API key client = Groq(api_key=creds.api_key) def search_courses(query): try: # Step 4: Preprocessing query and course titles for TF-IDF course_titles = df['title'].tolist() course_titles.append(query) # Add the query to the list of titles # Using TF-IDF to vectorize the course titles and query tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = tfidf_vectorizer.fit_transform(course_titles) # Compute cosine similarity between the query and course titles cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]).flatten() # Get the top 10 relevant courses based on cosine similarity top_indices = cosine_similarities.argsort()[-10:][::-1] # Step 5: Build results results = [] for index in top_indices: relevance = cosine_similarities[index] if relevance >= 0.5: # Only consider courses with at least 50% relevance course = df.iloc[index] results.append({ 'title': course['title'], 'image_url': course['image_url'], 'course_link': course['course_link'], 'score': round(relevance * 100, 2) # Show relevance as percentage }) return results if results else [] except Exception as e: return [] def gradio_search(query): result_list = search_courses(query) if result_list: html_output = '
' for item in result_list: course_title = item['title'] course_image = item['image_url'] course_link = item['course_link'] relevance_score = item['score'] html_output += f'''
{course_title}

{course_title}

Relevance: {relevance_score}%

View Course
''' html_output += '
' return html_output else: return '

No results found. Please try a different query.

' # Dark-themed CSS custom_css = """ body { font-family: Arial, sans-serif; background-color: #121212; /* Dark background */ color: #E0E0E0; /* Light text color for dark background */ } .container { max-width: 800px; margin: 0 auto; padding: 20px; color: #E0E0E0; } .results-container { display: flex; flex-wrap: wrap; justify-content: space-between; } .course-card { background-color: #1E1E1E; /* Darker card background */ border-radius: 8px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5); margin-bottom: 20px; overflow: hidden; width: 48%; transition: transform 0.2s; } .course-card:hover { transform: translateY(-5px); } .course-image { width: 100%; height: 150px; object-fit: cover; } .course-info { padding: 15px; } .course-info h3 { margin-top: 0; font-size: 18px; color: #E0E0E0; /* Light text color */ } .course-info p { color: #B0B0B0; /* Slightly darker text color for contrast */ font-size: 14px; margin-bottom: 10px; } .course-link { display: inline-block; background-color: #007bff; color: white; padding: 8px 12px; text-decoration: none; border-radius: 4px; font-size: 14px; transition: background-color 0.2s; } .course-link:hover { background-color: #0056b3; } .no-results { text-align: center; color: #B0B0B0; font-style: italic; } """ # Gradio interface iface = gr.Interface( fn=gradio_search, inputs=gr.Textbox(label="Enter your search query", placeholder="e.g., machine learning, data science, python"), outputs=gr.HTML(label="Search Results"), title="Analytics Vidhya Smart Course Search", description="Find the most relevant courses from Analytics Vidhya based on your query.", theme="huggingface", css=custom_css, examples=[["machine learning for beginners"], ["advanced data visualization techniques"], ["python programming basics"], ["Business Analytics"] ], ) if __name__ == "__main__": iface.launch()