kmrmanish's picture
history blame
2.75 kB
import streamlit as st
import difflib
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Download NLTK stopwords if not already done'stopwords')
# Read the data
lpi_df = pd.read_csv('Learning Pathway Index.csv')
# Rename columns
"Course / Learning material": "Course_Learning_Material",
"Course Level": "Course_Level",
"Type (Free or Paid)": "Type",
"Module / Sub-module \nDifficulty level": "Difficulty_Level",
"Keywords / Tags / Skills / Interests / Categories": "Keywords"
}, inplace=True)
# Combine features
lpi_df['combined_features'] = lpi_df['Course_Learning_Material'] + ' ' + lpi_df['Source'] + ' ' + lpi_df['Course_Level'] + ' ' + lpi_df['Type'] + ' ' + lpi_df['Module'] + ' ' + lpi_df['Difficulty_Level'] + ' ' + lpi_df['Keywords']
# Text preprocessing
combined_features = lpi_df['combined_features']
porter_stemmer = PorterStemmer()
def stemming(content):
stemmed_content = re.sub('[^a-zA-Z]', ' ', content)
stemmed_content = stemmed_content.lower()
stemmed_content = stemmed_content.split()
stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
stemmed_content = ' '.join(stemmed_content)
return stemmed_content
combined_features = combined_features.apply(stemming)
# TF-IDF and similarity
vectorizer = TfidfVectorizer()
combined_features = vectorizer.transform(combined_features)
similarity = cosine_similarity(combined_features)
# Streamlit app
st.title('Learning Pathway Index Course Recommendation')
user_input = st.text_input('Enter What You Want to Learn : ')
if user_input:
list_of_all_titles = lpi_df['Module'].tolist()
find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)
if find_close_match:
close_match = find_close_match[0]
index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
similarity_score = list(enumerate(similarity[index_of_the_course]))
sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)
st.subheader('Courses suggested for you:')
for i, course in enumerate(sorted_similar_course[:30], start=1):
index = course[0]
title_from_index = lpi_df.loc[index, 'Module']
st.write(f"{i}. {title_from_index}")
if len(sorted_similar_course) == 0:
st.write('No close matches found.')
st.write('No close matches found.')