Spaces:

rolwinpinto
/

finanalyst

Sleeping

App Files Files Community

rolwinpinto commited on Aug 14, 2024

Commit

ea26600

verified ·

1 Parent(s): 741514a

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -117

app.py CHANGED Viewed

@@ -1,124 +1,92 @@
 import os
 import streamlit as st
-import pypdf
 import matplotlib.pyplot as plt
 from io import BytesIO
 from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
-from llama_index.embeddings.fastembed import FastEmbedEmbedding
-from llama_index.llms.gemini import Gemini
-import json
-from crewai import Agent, Task, Crew, Process
-# Configure Google Gemini API key
-os.environ["GOOGLE_API_KEY"] = st.secrets["GOOGLE_API_KEY"]
-Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
-Settings.llm = Gemini(api_key=os.environ["GOOGLE_API_KEY"], temperature=0.5, model_name="models/gemini-pro")
-class FinAnalyst:
-    def __init__(self):
-        self.configure_agents()
-    def configure_agents(self):
-        # Configure agents with Gemini as the LLM
-        self.document_processor = Agent(
-            role='Document Processor',
-            goal='Process and extract text from financial documents',
-            backstory='Expert in handling various document formats and extracting relevant information',
-            llm=Settings.llm,  # Use the configured Gemini LLM
-            allow_delegation=False
-        )
-        self.data_extractor = Agent(
-            role='Data Extractor',
-            goal='Extract key financial data from processed documents',
-            backstory='Specialist in identifying and parsing financial information from text',
-            llm=Settings.llm,
-            allow_delegation=False
-        )
-        self.financial_analyst = Agent(
-            role='Financial Analyst',
-            goal='Analyze financial data and provide insightful summaries',
-            backstory='Experienced financial expert with deep knowledge of Fortune 500 companies',
-            llm=Settings.llm,
-            allow_delegation=False
-        )
-        self.data_visualizer = Agent(
-            role='Data Visualizer',
-            goal='Create visual representations of financial data',
-            backstory='Expert in data visualization techniques and financial charting',
-            llm=Settings.llm,
-            allow_delegation=False
-        )
-    def process_document(self, file_content):
-        task = Task(
-            description="Process the uploaded financial document and extract its text content",
-            agent=self.document_processor
-        )
-        return task.execute(file_content)
-    def extract_financial_data(self, document_text):
-        task = Task(
-            description="Extract key financial data from the document text. Focus on revenue figures and corresponding dates. Return the data as a JSON string with 'Revenue' and 'Date' lists.",
-            agent=self.data_extractor
-        )
-        return task.execute(document_text)
-    def analyze_financials(self, financial_data, query):
-        task = Task(
-            description=f"Analyze the financial data and answer the query: {query}. Provide a comprehensive analysis covering revenue trends, key metrics, major events, period comparisons, future outlook, and potential risks/opportunities.",
-            agent=self.financial_analyst
-        )
-        return task.execute(financial_data)
-    def visualize_data(self, financial_data):
-        task = Task(
-            description="Create a revenue comparison graph based on the financial data. Return the plot as a base64 encoded string.",
-            agent=self.data_visualizer
-        )
-        return task.execute(financial_data)
-    def run(self):
-        st.title("FinAnalyst: Fortune 500 Financial Document Analyzer")
-        st.write("Upload a financial document, ask questions, and get detailed analysis!")
-        uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])
-        if uploaded_file is not None:
-            file_content = uploaded_file.getvalue()
-            st.write("Analyzing financial document...")
-            document_text = self.process_document(file_content)
-            financial_data = self.extract_financial_data(document_text)
-            # Parse the JSON string to a Python dictionary
-            financial_dict = json.loads(financial_data)
-            query = st.text_input("Enter your financial analysis query (e.g., 'What are the revenue trends?')", "")
-            if query:
-                analysis = self.analyze_financials(financial_data, query)
-                st.write("## Financial Analysis Result")
-                st.write(analysis)
-            st.write("## Revenue Comparison")
-            if financial_dict["Revenue"] and financial_dict["Date"]:
-                fig, ax = plt.subplots(figsize=(10, 6))
-                ax.plot(financial_dict["Date"], financial_dict["Revenue"], marker="o", linestyle="-", color="b", label="Revenue")
-                ax.set_title("Revenue Comparison")
-                ax.set_xlabel("Date")
-                ax.set_ylabel("Revenue (in millions)")
-                ax.grid(True)
-                ax.legend()
-                plt.xticks(rotation=45, ha="right")
-                plt.tight_layout()
-                st.pyplot(fig)
-            else:
-                st.write("No revenue data found for comparison.")
 if __name__ == "__main__":
-    fin_analyst = FinAnalyst()
-    fin_analyst.run()

 import os
 import streamlit as st
+import PyPDF2
 import matplotlib.pyplot as plt
 from io import BytesIO
 from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.huggingface import HuggingFaceLLM
+import requests
+# Configure Hugging Face model
+Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+Settings.llm = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY"))
+def write_to_file(content, filename="./files/test.pdf"):
+    os.makedirs(os.path.dirname(filename), exist_ok=True)
+    with open(filename, "wb") as f:
+        f.write(content)
+def ingest_documents():
+    reader = SimpleDirectoryReader("./files/")
+    documents = reader.load_data()
+    return documents
+def load_data(documents):
+    index = VectorStoreIndex.from_documents(documents)
+    return index
+def generate_summary(index, document_text):
+    query_engine = index.as_query_engine()
+    response = query_engine.query(f"""
+    You are a financial analyst. Your task is to provide a comprehensive summary of the given financial document.
+    Analyze the following document and summarize it:
+    {document_text}
+    Please cover the following aspects:
+    1. Revenue and profit trends
+    2. Key financial metrics
+    3. Major financial events and decisions
+    4. Comparison with previous periods
+    5. Future outlook or forecasts
+    6. Any notable financial risks or opportunities
+    Provide a clear, concise, and professional summary
+    """)
+    return response.response
+def generate_comparison_graph(data):
+    fig, ax = plt.subplots()
+    ax.plot(data["Date"], data["Revenue"], marker="o")
+    ax.set_title("Revenue Comparison")
+    ax.set_xlabel("Date")
+    ax.set_ylabel("Revenue (in millions)")
+    st.pyplot(fig)
+# Streamlit app
+def main():
+    st.title("Financial Document Summarizer")
+    st.write("Upload a financial document, and let our AI summarize it!")
+    uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])
+    if uploaded_file is not None:
+        if uploaded_file.type == "application/pdf":
+            pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
+            document_text = ""
+            for page in pdf_reader.pages:
+                document_text += page.extract_text()
+        else:
+            document_text = uploaded_file.getvalue().decode("utf-8")
+        write_to_file(uploaded_file.getvalue())
+        st.write("Analyzing financial document...")
+        documents = ingest_documents()
+        index = load_data(documents)
+        summary = generate_summary(index, document_text)
+        st.write("## Financial Document Summary")
+        st.write(summary)
+        # Example data for graph (replace with actual data extraction logic)
+        data = {
+            "Date": ["Q1 2017", "Q2 2017", "Q1 2018", "Q2 2018"],
+            "Revenue": [500, 550, 600, 620]
+        }
+        st.write("## Revenue Comparison")
+        generate_comparison_graph(data)
 if __name__ == "__main__":
+    main()