kmrmanish's picture
Update app.py
27a6856
raw
history blame
2.56 kB
import streamlit as st
import difflib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
# for text data preprocessing
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Assuming you have 'lpi_df' and 'similarity' defined before this point
lpi_df = pd.read_csv('Learning_Pathway_Index.csv')
lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords_Tags_Skills_Interests_Categories']
combined_features = lpi_df['combined_features']
porter_stemmer = PorterStemmer()
def stemming(content):
stemmed_content = re.sub('[^a-zA-Z]',' ',content)
stemmed_content = stemmed_content.lower()
stemmed_content = stemmed_content.split()
stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
stemmed_content = ' '.join(stemmed_content)
return stemmed_content
combined_features = combined_features.apply(stemming)
vectorizer = TfidfVectorizer()
vectorizer.fit(combined_features)
combined_features = vectorizer.transform(combined_features)
similarity = cosine_similarity(combined_features)
st.title('Course Recommendation App')
user_input = st.text_input('Enter What You Want to Learn : ')
if user_input:
list_of_all_titles = lpi_df['Module'].tolist()
find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)
if find_close_match:
close_match = find_close_match[0]
index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
similarity_score = list(enumerate(similarity[index_of_the_course]))
sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)
st.write('Courses suggested for you :')
i = 1
for course in sorted_similar_course:
index = course[0]
title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0]
if i < 30:
st.write(f"{i}. {title_from_index}")
i += 1
if i == 1:
st.write('No close matches found.')
else:
st.write('No close matches found.')