rolwinpinto commited on
Commit
ea26600
·
verified ·
1 Parent(s): 741514a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -117
app.py CHANGED
@@ -1,124 +1,92 @@
1
  import os
2
  import streamlit as st
3
- import pypdf
4
  import matplotlib.pyplot as plt
5
  from io import BytesIO
6
  from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
7
- from llama_index.embeddings.fastembed import FastEmbedEmbedding
8
- from llama_index.llms.gemini import Gemini
9
- import json
10
- from crewai import Agent, Task, Crew, Process
11
-
12
- # Configure Google Gemini API key
13
- os.environ["GOOGLE_API_KEY"] = st.secrets["GOOGLE_API_KEY"]
14
- Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
15
- Settings.llm = Gemini(api_key=os.environ["GOOGLE_API_KEY"], temperature=0.5, model_name="models/gemini-pro")
16
-
17
- class FinAnalyst:
18
- def __init__(self):
19
- self.configure_agents()
20
-
21
- def configure_agents(self):
22
- # Configure agents with Gemini as the LLM
23
- self.document_processor = Agent(
24
- role='Document Processor',
25
- goal='Process and extract text from financial documents',
26
- backstory='Expert in handling various document formats and extracting relevant information',
27
- llm=Settings.llm, # Use the configured Gemini LLM
28
- allow_delegation=False
29
- )
30
-
31
- self.data_extractor = Agent(
32
- role='Data Extractor',
33
- goal='Extract key financial data from processed documents',
34
- backstory='Specialist in identifying and parsing financial information from text',
35
- llm=Settings.llm,
36
- allow_delegation=False
37
- )
38
-
39
- self.financial_analyst = Agent(
40
- role='Financial Analyst',
41
- goal='Analyze financial data and provide insightful summaries',
42
- backstory='Experienced financial expert with deep knowledge of Fortune 500 companies',
43
- llm=Settings.llm,
44
- allow_delegation=False
45
- )
46
-
47
- self.data_visualizer = Agent(
48
- role='Data Visualizer',
49
- goal='Create visual representations of financial data',
50
- backstory='Expert in data visualization techniques and financial charting',
51
- llm=Settings.llm,
52
- allow_delegation=False
53
- )
54
-
55
- def process_document(self, file_content):
56
- task = Task(
57
- description="Process the uploaded financial document and extract its text content",
58
- agent=self.document_processor
59
- )
60
- return task.execute(file_content)
61
-
62
- def extract_financial_data(self, document_text):
63
- task = Task(
64
- description="Extract key financial data from the document text. Focus on revenue figures and corresponding dates. Return the data as a JSON string with 'Revenue' and 'Date' lists.",
65
- agent=self.data_extractor
66
- )
67
- return task.execute(document_text)
68
-
69
- def analyze_financials(self, financial_data, query):
70
- task = Task(
71
- description=f"Analyze the financial data and answer the query: {query}. Provide a comprehensive analysis covering revenue trends, key metrics, major events, period comparisons, future outlook, and potential risks/opportunities.",
72
- agent=self.financial_analyst
73
- )
74
- return task.execute(financial_data)
75
-
76
- def visualize_data(self, financial_data):
77
- task = Task(
78
- description="Create a revenue comparison graph based on the financial data. Return the plot as a base64 encoded string.",
79
- agent=self.data_visualizer
80
- )
81
- return task.execute(financial_data)
82
-
83
- def run(self):
84
- st.title("FinAnalyst: Fortune 500 Financial Document Analyzer")
85
- st.write("Upload a financial document, ask questions, and get detailed analysis!")
86
-
87
- uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])
88
-
89
- if uploaded_file is not None:
90
- file_content = uploaded_file.getvalue()
91
-
92
- st.write("Analyzing financial document...")
93
-
94
- document_text = self.process_document(file_content)
95
- financial_data = self.extract_financial_data(document_text)
96
-
97
- # Parse the JSON string to a Python dictionary
98
- financial_dict = json.loads(financial_data)
99
-
100
- query = st.text_input("Enter your financial analysis query (e.g., 'What are the revenue trends?')", "")
101
-
102
- if query:
103
- analysis = self.analyze_financials(financial_data, query)
104
- st.write("## Financial Analysis Result")
105
- st.write(analysis)
106
-
107
- st.write("## Revenue Comparison")
108
- if financial_dict["Revenue"] and financial_dict["Date"]:
109
- fig, ax = plt.subplots(figsize=(10, 6))
110
- ax.plot(financial_dict["Date"], financial_dict["Revenue"], marker="o", linestyle="-", color="b", label="Revenue")
111
- ax.set_title("Revenue Comparison")
112
- ax.set_xlabel("Date")
113
- ax.set_ylabel("Revenue (in millions)")
114
- ax.grid(True)
115
- ax.legend()
116
- plt.xticks(rotation=45, ha="right")
117
- plt.tight_layout()
118
- st.pyplot(fig)
119
- else:
120
- st.write("No revenue data found for comparison.")
121
 
122
  if __name__ == "__main__":
123
- fin_analyst = FinAnalyst()
124
- fin_analyst.run()
 
1
  import os
2
  import streamlit as st
3
+ import PyPDF2
4
  import matplotlib.pyplot as plt
5
  from io import BytesIO
6
  from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
7
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
+ from llama_index.llms.huggingface import HuggingFaceLLM
9
+ import requests
10
+
11
+ # Configure Hugging Face model
12
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
13
+ Settings.llm = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY"))
14
+
15
+ def write_to_file(content, filename="./files/test.pdf"):
16
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
17
+ with open(filename, "wb") as f:
18
+ f.write(content)
19
+
20
+ def ingest_documents():
21
+ reader = SimpleDirectoryReader("./files/")
22
+ documents = reader.load_data()
23
+ return documents
24
+
25
+ def load_data(documents):
26
+ index = VectorStoreIndex.from_documents(documents)
27
+ return index
28
+
29
+ def generate_summary(index, document_text):
30
+ query_engine = index.as_query_engine()
31
+ response = query_engine.query(f"""
32
+ You are a financial analyst. Your task is to provide a comprehensive summary of the given financial document.
33
+ Analyze the following document and summarize it:
34
+ {document_text}
35
+
36
+ Please cover the following aspects:
37
+ 1. Revenue and profit trends
38
+ 2. Key financial metrics
39
+ 3. Major financial events and decisions
40
+ 4. Comparison with previous periods
41
+ 5. Future outlook or forecasts
42
+ 6. Any notable financial risks or opportunities
43
+
44
+ Provide a clear, concise, and professional summary
45
+ """)
46
+ return response.response
47
+
48
+ def generate_comparison_graph(data):
49
+ fig, ax = plt.subplots()
50
+ ax.plot(data["Date"], data["Revenue"], marker="o")
51
+ ax.set_title("Revenue Comparison")
52
+ ax.set_xlabel("Date")
53
+ ax.set_ylabel("Revenue (in millions)")
54
+ st.pyplot(fig)
55
+
56
+ # Streamlit app
57
+ def main():
58
+ st.title("Financial Document Summarizer")
59
+ st.write("Upload a financial document, and let our AI summarize it!")
60
+
61
+ uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])
62
+
63
+ if uploaded_file is not None:
64
+ if uploaded_file.type == "application/pdf":
65
+ pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
66
+ document_text = ""
67
+ for page in pdf_reader.pages:
68
+ document_text += page.extract_text()
69
+ else:
70
+ document_text = uploaded_file.getvalue().decode("utf-8")
71
+
72
+ write_to_file(uploaded_file.getvalue())
73
+
74
+ st.write("Analyzing financial document...")
75
+
76
+ documents = ingest_documents()
77
+ index = load_data(documents)
78
+ summary = generate_summary(index, document_text)
79
+
80
+ st.write("## Financial Document Summary")
81
+ st.write(summary)
82
+
83
+ # Example data for graph (replace with actual data extraction logic)
84
+ data = {
85
+ "Date": ["Q1 2017", "Q2 2017", "Q1 2018", "Q2 2018"],
86
+ "Revenue": [500, 550, 600, 620]
87
+ }
88
+ st.write("## Revenue Comparison")
89
+ generate_comparison_graph(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  if __name__ == "__main__":
92
+ main()