Spaces:
Running
Running
File size: 5,499 Bytes
160fbe1 5102822 49bb688 160fbe1 5102822 ecf2148 5102822 160fbe1 5102822 3455401 160fbe1 67c01ec 160fbe1 5102822 3455401 5102822 3455401 5102822 3455401 5102822 160fbe1 5102822 160fbe1 5102822 160fbe1 3455401 5102822 3455401 5102822 160fbe1 3455401 160fbe1 5102822 0d30433 5102822 3455401 5102822 3455401 51fcb96 14e71a0 5991ba4 5102822 3455401 5991ba4 3455401 51fcb96 14e71a0 5102822 3455401 0d30433 5991ba4 3455401 5991ba4 0d30433 3455401 0d30433 3455401 0401909 3455401 5991ba4 3455401 0d30433 5102822 0d30433 160fbe1 5102822 3455401 5102822 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import openai
import os
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
# Set Streamlit page configuration
st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide")
# Load environment variables
load_dotenv()
# OpenAI API Key (set in .env or directly in your environment)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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")
openai.api_key = OPENAI_API_KEY
# Template for instruction-only prompts
def generate_openai_response(instruction, context=None):
try:
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": instruction},
]
if context:
messages.append({"role": "user", "content": f"Context: {context}"})
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # Use GPT-3.5 Turbo for cost savings
messages=messages,
max_tokens=1200,
temperature=0.7
)
return response["choices"][0]["message"]["content"]
except Exception as e:
return f"Error: {str(e)}"
# Extracting text from .txt files
def get_text_files_content(folder):
text = ""
for filename in os.listdir(folder):
if filename.endswith('.txt'):
with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file:
text += file.read() + "\n"
return text
# Converting text to chunks
def get_chunks(raw_text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000, # Reduced chunk size for faster processing
chunk_overlap=200, # Smaller overlap for efficiency
length_function=len
)
return text_splitter.split_text(raw_text)
# Using OpenAI embeddings model and FAISS to create vectorstore
def get_vectorstore(chunks):
embeddings = OpenAIEmbeddings() # Uses OpenAI Embeddings
vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings)
return vectorstore
# Generating response from user queries
def handle_question(question, vectorstore=None):
if vectorstore:
# Retrieve relevant chunks using similarity search
documents = vectorstore.similarity_search(question, k=2)
context = "\n".join([doc.page_content for doc in documents])
context = context[:1000] # Limit context size for faster processing
return generate_openai_response(question, context)
else:
# Fallback to instruction-only prompt if no context is found
return generate_openai_response(question)
def main():
st.title("Chat with Notes :books:")
# Initialize session state
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
# Define folders for Current Affairs and Essays
data_folder = "data" # Current Affairs folders
essay_folder = "essays" # Essays folder
# Content type selection
content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"])
# Handle Current Affairs (each subject has its own folder)
if content_type == "Current Affairs":
if os.path.exists(data_folder):
subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))]
else:
subjects = []
# Handle Essays (all essays are in a single folder)
elif content_type == "Essays":
if os.path.exists(essay_folder):
subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')]
else:
subjects = []
# Subject selection
selected_subject = st.sidebar.selectbox("Select a Subject:", subjects)
# Process selected subject
raw_text = ""
if content_type == "Current Affairs" and selected_subject:
subject_folder = os.path.join(data_folder, selected_subject)
raw_text = get_text_files_content(subject_folder)
elif content_type == "Essays" and selected_subject:
subject_file = os.path.join(essay_folder, selected_subject + ".txt")
if os.path.exists(subject_file):
with open(subject_file, "r", encoding="utf-8") as file:
raw_text = file.read()
# Display preview of notes
if raw_text:
st.subheader("Preview of Notes")
st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True) # Show a snippet of the notes
# Create vectorstore for Current Affairs or Essays
text_chunks = get_chunks(raw_text)
vectorstore = get_vectorstore(text_chunks)
st.session_state.vectorstore = vectorstore
else:
st.warning("No content available for the selected subject.")
# Chat interface
st.subheader("Ask Your Question")
question = st.text_input("Ask a question about your selected subject:")
if question:
if st.session_state.vectorstore:
response = handle_question(question, st.session_state.vectorstore)
st.subheader("Answer:")
st.write(response)
else:
st.warning("Please load the content for the selected subject before asking a question.")
if __name__ == '__main__':
main()
|