|
import streamlit as st |
|
import difflib |
|
import pandas as pd |
|
import numpy as np |
|
|
|
|
|
|
|
import re |
|
import nltk |
|
nltk.download('stopwords') |
|
from nltk.corpus import stopwords |
|
from nltk.stem.porter import PorterStemmer |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
lpi_df = pd.read_csv('Learning_Pathway_Index.csv') |
|
|
|
lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords_Tags_Skills_Interests_Categories'] |
|
|
|
combined_features = lpi_df['combined_features'] |
|
|
|
porter_stemmer = PorterStemmer() |
|
|
|
|
|
def stemming(content): |
|
stemmed_content = re.sub('[^a-zA-Z]',' ',content) |
|
stemmed_content = stemmed_content.lower() |
|
stemmed_content = stemmed_content.split() |
|
stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')] |
|
stemmed_content = ' '.join(stemmed_content) |
|
return stemmed_content |
|
|
|
combined_features = combined_features.apply(stemming) |
|
|
|
|
|
vectorizer = TfidfVectorizer() |
|
|
|
vectorizer.fit(combined_features) |
|
|
|
combined_features = vectorizer.transform(combined_features) |
|
|
|
similarity = cosine_similarity(combined_features) |
|
|
|
|
|
|
|
|
|
st.title('Course Recommendation App') |
|
|
|
user_input = st.text_input('Enter What You Want to Learn : ') |
|
|
|
if user_input: |
|
list_of_all_titles = lpi_df['Module'].tolist() |
|
find_close_match = difflib.get_close_matches(user_input, list_of_all_titles) |
|
|
|
if find_close_match: |
|
close_match = find_close_match[0] |
|
index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0] |
|
similarity_score = list(enumerate(similarity[index_of_the_course])) |
|
sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True) |
|
|
|
st.write('Courses suggested for you :') |
|
|
|
i = 1 |
|
for course in sorted_similar_course: |
|
index = course[0] |
|
title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0] |
|
if i < 30: |
|
st.write(f"{i}. {title_from_index}") |
|
i += 1 |
|
|
|
if i == 1: |
|
st.write('No close matches found.') |
|
else: |
|
st.write('No close matches found.') |
|
|
|
|