rolwinpinto commited on
Commit
0fb0810
·
verified ·
1 Parent(s): 4c78f11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -28
app.py CHANGED
@@ -5,16 +5,23 @@ import matplotlib.pyplot as plt
5
  from io import BytesIO
6
  from llama_index import VectorStoreIndex, SimpleDirectoryReader
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
- from llama_index.llms.huggingface import HuggingFaceLLM
9
  import dotenv
10
  import re
 
11
 
12
  # Load environment variables
13
  dotenv.load_dotenv()
14
 
15
- # Configure Hugging Face models
 
 
 
 
 
 
 
 
16
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
17
- llm_model = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY"))
18
 
19
  def write_to_file(content, filename="./files/test.pdf"):
20
  os.makedirs(os.path.dirname(filename), exist_ok=True)
@@ -22,9 +29,6 @@ def write_to_file(content, filename="./files/test.pdf"):
22
  f.write(content)
23
 
24
  def extract_financial_data(document_text):
25
- """
26
- Extracts financial data such as revenue and dates from the document text.
27
- """
28
  financial_data = {
29
  "Revenue": [],
30
  "Date": []
@@ -35,7 +39,7 @@ def extract_financial_data(document_text):
35
 
36
  for i, line in enumerate(lines):
37
  if any(keyword in line.lower() for keyword in ["revenue", "total revenue", "sales"]):
38
- for j in range(i + 1, i + 6): # Look ahead a few lines for numbers
39
  matches = revenue_pattern.findall(lines[j])
40
  if matches:
41
  for match in matches:
@@ -64,26 +68,28 @@ def load_data(documents):
64
  return index
65
 
66
  def generate_summary(index, document_text, query):
67
- query_engine = index.as_query_engine(llm_model=llm_model)
68
- response = query_engine.query(f"""
69
- You are a financial analyst. Your task is to provide a comprehensive analysis of the financial document.
70
- Analyze the following document and respond to the query:
71
- {document_text}
72
-
73
- Query: {query}
74
-
75
- If the query is too general, respond with:
76
- Please cover the following aspects:
77
- 1. Revenue and profit trends
78
- 2. Key financial metrics
79
- 3. Major financial events and decisions
80
- 4. Comparison with previous periods
81
- 5. Future outlook or forecasts
82
- 6. Any notable financial risks or opportunities
83
-
84
- Provide a clear, concise, and professional response.
85
- """)
86
- return response.response
 
 
87
 
88
  def generate_comparison_graph(data):
89
  if not data["Date"] or not data["Revenue"]:
@@ -145,4 +151,3 @@ def main():
145
 
146
  if __name__ == "__main__":
147
  main()
148
-
 
5
  from io import BytesIO
6
  from llama_index import VectorStoreIndex, SimpleDirectoryReader
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 
8
  import dotenv
9
  import re
10
+ import requests
11
 
12
  # Load environment variables
13
  dotenv.load_dotenv()
14
 
15
+ # Configure Hugging Face API
16
+ API_URL = "https://api-inference.huggingface.co/models/sarvamai/sarvam-2b-v0.5"
17
+ headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
18
+
19
+ def query_huggingface_api(payload):
20
+ response = requests.post(API_URL, headers=headers, json=payload)
21
+ return response.json()
22
+
23
+ # Configure embedding model
24
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 
25
 
26
  def write_to_file(content, filename="./files/test.pdf"):
27
  os.makedirs(os.path.dirname(filename), exist_ok=True)
 
29
  f.write(content)
30
 
31
  def extract_financial_data(document_text):
 
 
 
32
  financial_data = {
33
  "Revenue": [],
34
  "Date": []
 
39
 
40
  for i, line in enumerate(lines):
41
  if any(keyword in line.lower() for keyword in ["revenue", "total revenue", "sales"]):
42
+ for j in range(i + 1, i + 6):
43
  matches = revenue_pattern.findall(lines[j])
44
  if matches:
45
  for match in matches:
 
68
  return index
69
 
70
  def generate_summary(index, document_text, query):
71
+ query_engine = index.as_query_engine()
72
+ llm_response = query_huggingface_api({
73
+ "inputs": f"""
74
+ You are a financial analyst. Your task is to provide a comprehensive analysis of the financial document.
75
+ Analyze the following document and respond to the query:
76
+ {document_text}
77
+
78
+ Query: {query}
79
+
80
+ If the query is too general, respond with:
81
+ Please cover the following aspects:
82
+ 1. Revenue and profit trends
83
+ 2. Key financial metrics
84
+ 3. Major financial events and decisions
85
+ 4. Comparison with previous periods
86
+ 5. Future outlook or forecasts
87
+ 6. Any notable financial risks or opportunities
88
+
89
+ Provide a clear, concise, and professional response.
90
+ """
91
+ })
92
+ return llm_response.get("generated_text", "No response from model.")
93
 
94
  def generate_comparison_graph(data):
95
  if not data["Date"] or not data["Revenue"]:
 
151
 
152
  if __name__ == "__main__":
153
  main()