File size: 5,499 Bytes
160fbe1
5102822
49bb688
160fbe1
 
 
5102822
 
ecf2148
 
 
5102822
 
 
160fbe1
 
 
5102822
3455401
160fbe1
 
 
 
 
 
 
 
 
67c01ec
160fbe1
 
 
 
 
 
 
5102822
3455401
5102822
 
 
 
 
 
 
 
3455401
5102822
 
 
3455401
 
 
5102822
160fbe1
5102822
160fbe1
5102822
160fbe1
3455401
 
5102822
3455401
5102822
 
160fbe1
3455401
 
160fbe1
 
 
 
 
5102822
 
0d30433
5102822
3455401
5102822
 
 
3455401
 
 
51fcb96
14e71a0
5991ba4
5102822
3455401
5991ba4
3455401
 
 
 
 
 
 
 
 
 
51fcb96
14e71a0
5102822
 
3455401
0d30433
5991ba4
 
 
 
3455401
5991ba4
 
 
0d30433
3455401
0d30433
 
3455401
0401909
3455401
 
 
 
5991ba4
 
 
3455401
0d30433
5102822
 
 
 
0d30433
160fbe1
5102822
 
 
3455401
5102822
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import openai
import os
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv

# Set Streamlit page configuration
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")

# Load environment variables
load_dotenv()

# OpenAI API Key (set in .env or directly in your environment)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
openai.api_key = OPENAI_API_KEY

# Template for instruction-only prompts
def generate_openai_response(instruction, context=None):
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": instruction},
        ]
        if context:
            messages.append({"role": "user", "content": f"Context: {context}"})
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",  # Use GPT-3.5 Turbo for cost savings
            messages=messages,
            max_tokens=1200,
            temperature=0.7
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Extracting text from .txt files
def get_text_files_content(folder):
    text = ""
    for filename in os.listdir(folder):
        if filename.endswith('.txt'):
            with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file:
                text += file.read() + "\n"
    return text

# Converting text to chunks
def get_chunks(raw_text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,  # Reduced chunk size for faster processing
        chunk_overlap=200,  # Smaller overlap for efficiency
        length_function=len
    )
    return text_splitter.split_text(raw_text)

# Using OpenAI embeddings model and FAISS to create vectorstore
def get_vectorstore(chunks):
    embeddings = OpenAIEmbeddings()  # Uses OpenAI Embeddings
    vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
    return vectorstore

# Generating response from user queries
def handle_question(question, vectorstore=None):
    if vectorstore:
        # Retrieve relevant chunks using similarity search
        documents = vectorstore.similarity_search(question, k=2)
        context = "\n".join([doc.page_content for doc in documents])
        context = context[:1000]  # Limit context size for faster processing
        return generate_openai_response(question, context)
    else:
        # Fallback to instruction-only prompt if no context is found
        return generate_openai_response(question)

def main():
    st.title("Chat with Notes :books:")

    # Initialize session state
    if "vectorstore" not in st.session_state:
        st.session_state.vectorstore = None

    # Define folders for Current Affairs and Essays
    data_folder = "data"  # Current Affairs folders
    essay_folder = "essays"  # Essays folder

    # Content type selection
    content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"])

    # Handle Current Affairs (each subject has its own folder)
    if content_type == "Current Affairs":
        if os.path.exists(data_folder):
            subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))]
        else:
            subjects = []
    # Handle Essays (all essays are in a single folder)
    elif content_type == "Essays":
        if os.path.exists(essay_folder):
            subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')]
        else:
            subjects = []

    # Subject selection
    selected_subject = st.sidebar.selectbox("Select a Subject:", subjects)

    # Process selected subject
    raw_text = ""
    if content_type == "Current Affairs" and selected_subject:
        subject_folder = os.path.join(data_folder, selected_subject)
        raw_text = get_text_files_content(subject_folder)
    elif content_type == "Essays" and selected_subject:
        subject_file = os.path.join(essay_folder, selected_subject + ".txt")
        if os.path.exists(subject_file):
            with open(subject_file, "r", encoding="utf-8") as file:
                raw_text = file.read()

    # Display preview of notes
    if raw_text:
        st.subheader("Preview of Notes")
        st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True)  # Show a snippet of the notes

        # Create vectorstore for Current Affairs or Essays
        text_chunks = get_chunks(raw_text)
        vectorstore = get_vectorstore(text_chunks)
        st.session_state.vectorstore = vectorstore
    else:
        st.warning("No content available for the selected subject.")

    # Chat interface
    st.subheader("Ask Your Question")
    question = st.text_input("Ask a question about your selected subject:")
    if question:
        if st.session_state.vectorstore:
            response = handle_question(question, st.session_state.vectorstore)
            st.subheader("Answer:")
            st.write(response)
        else:
            st.warning("Please load the content for the selected subject before asking a question.")

if __name__ == '__main__':
    main()