aryan79 commited on
Commit
e8ab094
·
verified ·
1 Parent(s): 741b34d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -48
app.py CHANGED
@@ -2,39 +2,45 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
5
- import os
6
  from groq import Groq
7
  import creds # Assuming creds.py holds your API key as creds.api_key
8
 
9
  # Step 1: Scrape the free courses from Analytics Vidhya
10
- url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
- response = requests.get(url)
12
- soup = BeautifulSoup(response.content, 'html.parser')
 
 
 
 
 
13
 
14
- courses = []
 
15
 
16
- # Extracting course title, image, and course link
17
- for course_card in soup.find_all('header', class_='course-card__img-container'):
18
- img_tag = course_card.find('img', class_='course-card__img')
19
-
20
- if img_tag:
21
- title = img_tag.get('alt')
22
- image_url = img_tag.get('src')
23
 
24
- link_tag = course_card.find_previous('a')
25
- if link_tag:
26
- course_link = link_tag.get('href')
27
- if not course_link.startswith('http'):
28
- course_link = 'https://courses.analyticsvidhya.com' + course_link
29
-
30
- courses.append({
31
- 'title': title,
32
- 'image_url': image_url,
33
- 'course_link': course_link
34
- })
 
 
 
 
 
35
 
36
  # Step 2: Create DataFrame
37
- df = pd.DataFrame(courses)
38
 
39
  # Step 3: Initialize the Groq client and set the API key
40
  client = Groq(api_key=creds.api_key) # Properly passing the API key
@@ -44,9 +50,6 @@ def search_courses(query):
44
  print(f"Searching for: {query}")
45
  print(f"Number of courses in database: {len(df)}")
46
 
47
- # Normalize the query to lowercase for case-insensitive comparison
48
- normalized_query = query.lower()
49
-
50
  # Prepare the prompt for Groq
51
  prompt = f"""Given the following query: "{query}"
52
  Please analyze the query and rank the following courses based on their relevance to the query.
@@ -80,27 +83,22 @@ def search_courses(query):
80
  if line.startswith('Title:'):
81
  title = line.split('Title:')[1].strip()
82
  print(f"Found title: {title}")
83
-
84
- # Normalize the title to lowercase for comparison
85
- normalized_title = title.lower()
86
-
87
- # Check if the normalized title contains the normalized query
88
- if normalized_query in normalized_title:
89
- relevance = float(line.split('Relevance:')[1].strip())
90
- print(f"Relevance for {title}: {relevance}")
91
- if relevance >= 0.5:
92
- matching_courses = df[df['title'] == title]
93
- if not matching_courses.empty:
94
- course = matching_courses.iloc[0]
95
- results.append({
96
- 'title': title,
97
- 'image_url': course['image_url'],
98
- 'course_link': course['course_link'],
99
- 'score': relevance
100
- })
101
- print(f"Added course: {title}")
102
- else:
103
- print(f"Warning: Course not found in database: {title}")
104
 
105
  print(f"Number of results found: {len(results)}")
106
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results
 
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import gradio as gr
 
5
  from groq import Groq
6
  import creds # Assuming creds.py holds your API key as creds.api_key
7
 
8
  # Step 1: Scrape the free courses from Analytics Vidhya
9
+ def scrape_courses():
10
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses"
11
+ try:
12
+ response = requests.get(url)
13
+ response.raise_for_status() # Raise an error for bad status codes
14
+ except requests.RequestException as e:
15
+ print(f"Error fetching data: {e}")
16
+ return []
17
 
18
+ soup = BeautifulSoup(response.content, 'html.parser')
19
+ courses = []
20
 
21
+ # Extracting course title, image, and course link
22
+ for course_card in soup.find_all('header', class_='course-card__img-container'):
23
+ img_tag = course_card.find('img', class_='course-card__img')
 
 
 
 
24
 
25
+ if img_tag:
26
+ title = img_tag.get('alt')
27
+ image_url = img_tag.get('src')
28
+
29
+ link_tag = course_card.find_previous('a')
30
+ if link_tag:
31
+ course_link = link_tag.get('href')
32
+ if not course_link.startswith('http'):
33
+ course_link = 'https://courses.analyticsvidhya.com' + course_link
34
+
35
+ courses.append({
36
+ 'title': title,
37
+ 'image_url': image_url,
38
+ 'course_link': course_link
39
+ })
40
+ return courses
41
 
42
  # Step 2: Create DataFrame
43
+ df = pd.DataFrame(scrape_courses())
44
 
45
  # Step 3: Initialize the Groq client and set the API key
46
  client = Groq(api_key=creds.api_key) # Properly passing the API key
 
50
  print(f"Searching for: {query}")
51
  print(f"Number of courses in database: {len(df)}")
52
 
 
 
 
53
  # Prepare the prompt for Groq
54
  prompt = f"""Given the following query: "{query}"
55
  Please analyze the query and rank the following courses based on their relevance to the query.
 
83
  if line.startswith('Title:'):
84
  title = line.split('Title:')[1].strip()
85
  print(f"Found title: {title}")
86
+ elif line.startswith('Relevance:'):
87
+ relevance = float(line.split('Relevance:')[1].strip())
88
+ print(f"Relevance for {title}: {relevance}")
89
+ if relevance >= 0.5:
90
+ matching_courses = df[df['title'] == title]
91
+ if not matching_courses.empty:
92
+ course = matching_courses.iloc[0]
93
+ results.append({
94
+ 'title': title,
95
+ 'image_url': course['image_url'],
96
+ 'course_link': course['course_link'],
97
+ 'score': relevance
98
+ })
99
+ print(f"Added course: {title}")
100
+ else:
101
+ print(f"Warning: Course not found in database: {title}")
 
 
 
 
 
102
 
103
  print(f"Number of results found: {len(results)}")
104
  return sorted(results, key=lambda x: x['score'], reverse=True)[:10] # Return top 10 results