aryan79 commited on
Commit
3cc36a9
·
verified ·
1 Parent(s): 9c7b5e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -42
app.py CHANGED
@@ -4,6 +4,8 @@ import pandas as pd
4
  import gradio as gr
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
 
 
7
  import creds # Assuming creds.py holds your API key as creds.api_key
8
 
9
  # Step 1: Scrape the free courses from Analytics Vidhya
@@ -36,40 +38,42 @@ for course_card in soup.find_all('header', class_='course-card__img-container'):
36
  # Step 2: Create DataFrame
37
  df = pd.DataFrame(courses)
38
 
39
- # Step 3: Text Processing for Improved Relevance
40
- def preprocess_text(text):
41
- text = text.lower()
42
- text = text.replace("-", " ")
43
- return text
44
 
45
- df['processed_title'] = df['title'].apply(preprocess_text)
 
 
 
 
46
 
47
- # Step 4: Generate TF-IDF Vectors for Titles
48
- vectorizer = TfidfVectorizer(stop_words='english')
49
- tfidf_matrix = vectorizer.fit_transform(df['processed_title'])
50
 
51
- def search_courses(query):
52
- # Process query and generate its TF-IDF vector
53
- processed_query = preprocess_text(query)
54
- query_vector = vectorizer.transform([processed_query])
55
-
56
- # Calculate cosine similarity
57
- similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
58
- df['relevance_score'] = similarities
59
-
60
- # Filter and sort courses based on relevance score
61
- relevant_courses = df[df['relevance_score'] >= 0.3].sort_values(by='relevance_score', ascending=False)
62
- results = []
63
-
64
- for _, course in relevant_courses.iterrows():
65
- results.append({
66
- 'title': course['title'],
67
- 'image_url': course['image_url'],
68
- 'course_link': course['course_link'],
69
- 'score': course['relevance_score']
70
- })
71
-
72
- return results[:10]
 
73
 
74
  def gradio_search(query):
75
  result_list = search_courses(query)
@@ -80,7 +84,7 @@ def gradio_search(query):
80
  course_title = item['title']
81
  course_image = item['image_url']
82
  course_link = item['course_link']
83
- relevance_score = round(item['score'] * 100, 2)
84
 
85
  html_output += f'''
86
  <div class="course-card">
@@ -100,8 +104,8 @@ def gradio_search(query):
100
  custom_css = """
101
  body {
102
  font-family: Arial, sans-serif;
103
- background-color: #121212;
104
- color: #E0E0E0;
105
  }
106
  .container {
107
  max-width: 800px;
@@ -115,7 +119,7 @@ body {
115
  justify-content: space-between;
116
  }
117
  .course-card {
118
- background-color: #1E1E1E;
119
  border-radius: 8px;
120
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
121
  margin-bottom: 20px;
@@ -137,10 +141,10 @@ body {
137
  .course-info h3 {
138
  margin-top: 0;
139
  font-size: 18px;
140
- color: #E0E0E0;
141
  }
142
  .course-info p {
143
- color: #B0B0B0;
144
  font-size: 14px;
145
  margin-bottom: 10px;
146
  }
@@ -173,11 +177,10 @@ iface = gr.Interface(
173
  description="Find the most relevant courses from Analytics Vidhya based on your query.",
174
  theme="huggingface",
175
  css=custom_css,
176
- examples=[
177
- ["machine learning for beginners"],
178
- ["advanced data visualization techniques"],
179
- ["python programming basics"],
180
- ["Business Analytics"]
181
  ],
182
  )
183
 
 
4
  import gradio as gr
5
  from sklearn.feature_extraction.text import TfidfVectorizer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
+ import os
8
+ from groq import Groq
9
  import creds # Assuming creds.py holds your API key as creds.api_key
10
 
11
  # Step 1: Scrape the free courses from Analytics Vidhya
 
38
  # Step 2: Create DataFrame
39
  df = pd.DataFrame(courses)
40
 
41
+ # Step 3: Initialize the Groq client and set the API key
42
+ client = Groq(api_key=creds.api_key)
 
 
 
43
 
44
+ def search_courses(query):
45
+ try:
46
+ # Step 4: Preprocessing query and course titles for TF-IDF
47
+ course_titles = df['title'].tolist()
48
+ course_titles.append(query) # Add the query to the list of titles
49
 
50
+ # Using TF-IDF to vectorize the course titles and query
51
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
52
+ tfidf_matrix = tfidf_vectorizer.fit_transform(course_titles)
53
 
54
+ # Compute cosine similarity between the query and course titles
55
+ cosine_similarities = cosine_similarity(tfidf_matrix[-1:], tfidf_matrix[:-1]).flatten()
56
+
57
+ # Get the top 10 relevant courses based on cosine similarity
58
+ top_indices = cosine_similarities.argsort()[-10:][::-1]
59
+
60
+ # Step 5: Build results
61
+ results = []
62
+ for index in top_indices:
63
+ relevance = cosine_similarities[index]
64
+ if relevance >= 0.5: # Only consider courses with at least 50% relevance
65
+ course = df.iloc[index]
66
+ results.append({
67
+ 'title': course['title'],
68
+ 'image_url': course['image_url'],
69
+ 'course_link': course['course_link'],
70
+ 'score': round(relevance * 100, 2) # Show relevance as percentage
71
+ })
72
+
73
+ return results if results else []
74
+
75
+ except Exception as e:
76
+ return []
77
 
78
  def gradio_search(query):
79
  result_list = search_courses(query)
 
84
  course_title = item['title']
85
  course_image = item['image_url']
86
  course_link = item['course_link']
87
+ relevance_score = item['score']
88
 
89
  html_output += f'''
90
  <div class="course-card">
 
104
  custom_css = """
105
  body {
106
  font-family: Arial, sans-serif;
107
+ background-color: #121212; /* Dark background */
108
+ color: #E0E0E0; /* Light text color for dark background */
109
  }
110
  .container {
111
  max-width: 800px;
 
119
  justify-content: space-between;
120
  }
121
  .course-card {
122
+ background-color: #1E1E1E; /* Darker card background */
123
  border-radius: 8px;
124
  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
125
  margin-bottom: 20px;
 
141
  .course-info h3 {
142
  margin-top: 0;
143
  font-size: 18px;
144
+ color: #E0E0E0; /* Light text color */
145
  }
146
  .course-info p {
147
+ color: #B0B0B0; /* Slightly darker text color for contrast */
148
  font-size: 14px;
149
  margin-bottom: 10px;
150
  }
 
177
  description="Find the most relevant courses from Analytics Vidhya based on your query.",
178
  theme="huggingface",
179
  css=custom_css,
180
+ examples=[["machine learning for beginners"],
181
+ ["advanced data visualization techniques"],
182
+ ["python programming basics"],
183
+ ["Business Analytics"]
 
184
  ],
185
  )
186