aryan79 commited on
Commit
741b34d
·
verified ·
1 Parent(s): 6ebde36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -46
app.py CHANGED
@@ -2,45 +2,39 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
 
5
  from groq import Groq
6
  import creds # Assuming creds.py holds your API key as creds.api_key
7
 
8
  # Step 1: Scrape the free courses from Analytics Vidhya
9
- def scrape_courses():
10
- url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
- try:
12
- response = requests.get(url)
13
- response.raise_for_status() # Raise an error for bad status codes
14
- except requests.RequestException as e:
15
- print(f"Error fetching data: {e}")
16
- return []
17
 
18
- soup = BeautifulSoup(response.content, 'html.parser')
19
- courses = []
20
 
21
- # Extracting course title, image, and course link
22
- for course_card in soup.find_all('header', class_='course-card__img-container'):
23
- img_tag = course_card.find('img', class_='course-card__img')
 
 
 
 
24
 
25
- if img_tag:
26
- title = img_tag.get('alt')
27
- image_url = img_tag.get('src')
28
-
29
- link_tag = course_card.find_previous('a')
30
- if link_tag:
31
- course_link = link_tag.get('href')
32
- if not course_link.startswith('http'):
33
- course_link = 'https://courses.analyticsvidhya.com' + course_link
34
-
35
- courses.append({
36
- 'title': title,
37
- 'image_url': image_url,
38
- 'course_link': course_link
39
- })
40
- return courses
41
 
42
  # Step 2: Create DataFrame
43
- df = pd.DataFrame(scrape_courses())
44
 
45
  # Step 3: Initialize the Groq client and set the API key
46
  client = Groq(api_key=creds.api_key) # Properly passing the API key
@@ -50,6 +44,9 @@ def search_courses(query):
50
  print(f"Searching for: {query}")
51
  print(f"Number of courses in database: {len(df)}")
52
 
 
 
 
53
  # Prepare the prompt for Groq
54
  prompt = f"""Given the following query: "{query}"
55
  Please analyze the query and rank the following courses based on their relevance to the query.
@@ -83,22 +80,27 @@ def search_courses(query):
83
  if line.startswith('Title:'):
84
  title = line.split('Title:')[1].strip()
85
  print(f"Found title: {title}")
86
- elif line.startswith('Relevance:'):
87
- relevance = float(line.split('Relevance:')[1].strip())
88
- print(f"Relevance for {title}: {relevance}")
89
- if relevance >= 0.5:
90
- matching_courses = df[df['title'] == title]
91
- if not matching_courses.empty:
92
- course = matching_courses.iloc[0]
93
- results.append({
94
- 'title': title,
95
- 'image_url': course['image_url'],
96
- 'course_link': course['course_link'],
97
- 'score': relevance
98
- })
99
- print(f"Added course: {title}")
100
- else:
101
- print(f"Warning: Course not found in database: {title}")
 
 
 
 
 
102
 
103
  print(f"Number of results found: {len(results)}")
104
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
 
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
5
+ import os
6
  from groq import Groq
7
  import creds # Assuming creds.py holds your API key as creds.api_key
8
 
9
  # Step 1: Scrape the free courses from Analytics Vidhya
10
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
+ response = requests.get(url)
12
+ soup = BeautifulSoup(response.content, 'html.parser')
 
 
 
 
 
13
 
14
+ courses = []
 
15
 
16
+ # Extracting course title, image, and course link
17
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
18
+ img_tag = course_card.find('img', class_='course-card__img')
19
+
20
+ if img_tag:
21
+ title = img_tag.get('alt')
22
+ image_url = img_tag.get('src')
23
 
24
+ link_tag = course_card.find_previous('a')
25
+ if link_tag:
26
+ course_link = link_tag.get('href')
27
+ if not course_link.startswith('http'):
28
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
29
+
30
+ courses.append({
31
+ 'title': title,
32
+ 'image_url': image_url,
33
+ 'course_link': course_link
34
+ })
 
 
 
 
 
35
 
36
  # Step 2: Create DataFrame
37
+ df = pd.DataFrame(courses)
38
 
39
  # Step 3: Initialize the Groq client and set the API key
40
  client = Groq(api_key=creds.api_key) # Properly passing the API key
 
44
  print(f"Searching for: {query}")
45
  print(f"Number of courses in database: {len(df)}")
46
 
47
+ # Normalize the query to lowercase for case-insensitive comparison
48
+ normalized_query = query.lower()
49
+
50
  # Prepare the prompt for Groq
51
  prompt = f"""Given the following query: "{query}"
52
  Please analyze the query and rank the following courses based on their relevance to the query.
 
80
  if line.startswith('Title:'):
81
  title = line.split('Title:')[1].strip()
82
  print(f"Found title: {title}")
83
+
84
+ # Normalize the title to lowercase for comparison
85
+ normalized_title = title.lower()
86
+
87
+ # Check if the normalized title contains the normalized query
88
+ if normalized_query in normalized_title:
89
+ relevance = float(line.split('Relevance:')[1].strip())
90
+ print(f"Relevance for {title}: {relevance}")
91
+ if relevance >= 0.5:
92
+ matching_courses = df[df['title'] == title]
93
+ if not matching_courses.empty:
94
+ course = matching_courses.iloc[0]
95
+ results.append({
96
+ 'title': title,
97
+ 'image_url': course['image_url'],
98
+ 'course_link': course['course_link'],
99
+ 'score': relevance
100
+ })
101
+ print(f"Added course: {title}")
102
+ else:
103
+ print(f"Warning: Course not found in database: {title}")
104
 
105
  print(f"Number of results found: {len(results)}")
106
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results