Spaces:
Runtime error
Runtime error
| import openai | |
| import os | |
| import streamlit as st | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import OpenAIEmbeddings | |
| from dotenv import load_dotenv | |
| # Set Streamlit page configuration | |
| st.set_page_config(page_title="Chat with Notes and AI", page_icon=":books:", layout="wide") | |
| # Load environment variables | |
| load_dotenv() | |
| # OpenAI API Key (set in .env or directly in your environment) | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "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") | |
| openai.api_key = OPENAI_API_KEY | |
| # Template for instruction-only prompts | |
| def generate_openai_response(instruction, context=None): | |
| try: | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": instruction}, | |
| ] | |
| if context: | |
| messages.append({"role": "user", "content": f"Context: {context}"}) | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", # Use GPT-3.5 Turbo for cost savings | |
| messages=messages, | |
| max_tokens=1200, | |
| temperature=0.7 | |
| ) | |
| return response["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Extracting text from .txt files | |
| def get_text_files_content(folder): | |
| text = "" | |
| for filename in os.listdir(folder): | |
| if filename.endswith('.txt'): | |
| with open(os.path.join(folder, filename), 'r', encoding='utf-8') as file: | |
| text += file.read() + "\n" | |
| return text | |
| # Converting text to chunks | |
| def get_chunks(raw_text): | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", | |
| chunk_size=1000, # Reduced chunk size for faster processing | |
| chunk_overlap=200, # Smaller overlap for efficiency | |
| length_function=len | |
| ) | |
| return text_splitter.split_text(raw_text) | |
| # Using OpenAI embeddings model and FAISS to create vectorstore | |
| def get_vectorstore(chunks): | |
| embeddings = OpenAIEmbeddings() # Uses OpenAI Embeddings | |
| vectorstore = FAISS.from_texts(texts=chunks, embedding=embeddings) | |
| return vectorstore | |
| # Generating response from user queries | |
| def handle_question(question, vectorstore=None): | |
| if vectorstore: | |
| # Retrieve relevant chunks using similarity search | |
| documents = vectorstore.similarity_search(question, k=2) | |
| context = "\n".join([doc.page_content for doc in documents]) | |
| context = context[:1000] # Limit context size for faster processing | |
| return generate_openai_response(question, context) | |
| else: | |
| # Fallback to instruction-only prompt if no context is found | |
| return generate_openai_response(question) | |
| def main(): | |
| st.title("Chat with Notes :books:") | |
| # Initialize session state | |
| if "vectorstore" not in st.session_state: | |
| st.session_state.vectorstore = None | |
| # Define folders for Current Affairs and Essays | |
| data_folder = "data" # Current Affairs folders | |
| essay_folder = "essays" # Essays folder | |
| # Content type selection | |
| content_type = st.sidebar.radio("Select Content Type:", ["Current Affairs", "Essays"]) | |
| # Handle Current Affairs (each subject has its own folder) | |
| if content_type == "Current Affairs": | |
| if os.path.exists(data_folder): | |
| subjects = [f for f in os.listdir(data_folder) if os.path.isdir(os.path.join(data_folder, f))] | |
| else: | |
| subjects = [] | |
| # Handle Essays (all essays are in a single folder) | |
| elif content_type == "Essays": | |
| if os.path.exists(essay_folder): | |
| subjects = [f.replace(".txt", "") for f in os.listdir(essay_folder) if f.endswith('.txt')] | |
| else: | |
| subjects = [] | |
| # Subject selection | |
| selected_subject = st.sidebar.selectbox("Select a Subject:", subjects) | |
| # Process selected subject | |
| raw_text = "" | |
| if content_type == "Current Affairs" and selected_subject: | |
| subject_folder = os.path.join(data_folder, selected_subject) | |
| raw_text = get_text_files_content(subject_folder) | |
| elif content_type == "Essays" and selected_subject: | |
| subject_file = os.path.join(essay_folder, selected_subject + ".txt") | |
| if os.path.exists(subject_file): | |
| with open(subject_file, "r", encoding="utf-8") as file: | |
| raw_text = file.read() | |
| # Display preview of notes | |
| if raw_text: | |
| st.subheader("Preview of Notes") | |
| st.text_area("Preview Content:", value=raw_text[:2000], height=300, disabled=True) # Show a snippet of the notes | |
| # Create vectorstore for Current Affairs or Essays | |
| text_chunks = get_chunks(raw_text) | |
| vectorstore = get_vectorstore(text_chunks) | |
| st.session_state.vectorstore = vectorstore | |
| else: | |
| st.warning("No content available for the selected subject.") | |
| # Chat interface | |
| st.subheader("Ask Your Question") | |
| question = st.text_input("Ask a question about your selected subject:") | |
| if question: | |
| if st.session_state.vectorstore: | |
| response = handle_question(question, st.session_state.vectorstore) | |
| st.subheader("Answer:") | |
| st.write(response) | |
| else: | |
| st.warning("Please load the content for the selected subject before asking a question.") | |
| if __name__ == '__main__': | |
| main() | |