File size: 2,341 Bytes
59fb831
 
438c423
27a6856
 
 
ae73ba1
14f9e42
27a6856
 
 
 
59fb831
 
 
91bec1a
438c423
27a6856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59fb831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import difflib
import pandas as pd
import numpy as np



# for text data preprocessing
import re
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Assuming you have 'lpi_df' and 'similarity' defined before this point

lpi_df = pd.read_csv('Learning_Pathway_Index.csv')

lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords_Tags_Skills_Interests_Categories']

combined_features = lpi_df['combined_features']

porter_stemmer = PorterStemmer()


def stemming(content):
    stemmed_content = re.sub('[^a-zA-Z]',' ',content)
    stemmed_content = stemmed_content.lower()
    stemmed_content = stemmed_content.split()
    stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
    stemmed_content = ' '.join(stemmed_content)
    return stemmed_content

combined_features = combined_features.apply(stemming)


vectorizer = TfidfVectorizer()

vectorizer.fit(combined_features)

combined_features = vectorizer.transform(combined_features)

similarity = cosine_similarity(combined_features)




st.title('Course Recommendation App')

user_input = st.text_input('Enter What You Want to Learn : ')

if user_input:
    list_of_all_titles = lpi_df['Module'].tolist()
    find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)

    if find_close_match:
        close_match = find_close_match[0]
        index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
        similarity_score = list(enumerate(similarity[index_of_the_course]))
        sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)

        st.write('Courses suggested for you :')

        i = 1
        for course in sorted_similar_course:
            index = course[0]
            title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0]
            if i < 30:
                st.write(f"{i}. {title_from_index}")
                i += 1

        if i == 1:
            st.write('No close matches found.')
    else:
        st.write('No close matches found.')