import os import streamlit as st import fitz import openai import sqlite3 from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter import pdfplumber # Initialize once @st.cache_resource def init_system(): # 1. Process PDF process_pdf("Q1FY24.pdf") # 2. Load pre-processed data embeddings = OpenAIEmbeddings(openai_api_key="sk-schoolaiassistant-IJAus8rOlO5f3hnrBcyuT3BlbkFJ60gsZPoeRzVR0bwKuABN") vector_store = FAISS.load_local("faiss_index", embeddings) # 3. Connect SQL conn = sqlite3.connect('metric_table.db') return vector_store, conn def process_pdf(pdf_path): # Structured Data conn = sqlite3.connect('metric_table.db') cursor = conn.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table (metric TEXT, quarter TEXT, value REAL)''') # Unstructured Data full_text = "" doc = fitz.open(pdf_path) with pdfplumber.open(pdf_path) as pdf: for page_num, page in enumerate(pdf.pages): # Structured extraction if "Financial Performance Summary" in page.extract_text(): tables = page.extract_tables() # Add to SQL (example) # ... (Add full processing logic from previous code) # Save vector store splitter = RecursiveCharacterTextSplitter(chunk_size=1000) chunks = splitter.split_text(full_text) embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) FAISS.from_texts(chunks, embeddings).save_local("faiss_index") # Streamlit UI def main(): st.title("Fundrev Financial Analyzer") # Initialize system vector_store, conn = init_system() query = st.text_input("Ask financial question:") if query: # Hybrid query logic if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]): cursor = conn.cursor() cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'") st.table(cursor.fetchall()) else: docs = vector_store.similarity_search(query) st.write(docs[0].page_content) if __name__ == "__main__": main()