Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,16 +5,23 @@ import matplotlib.pyplot as plt
|
|
5 |
from io import BytesIO
|
6 |
from llama_index import VectorStoreIndex, SimpleDirectoryReader
|
7 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
8 |
-
from llama_index.llms.huggingface import HuggingFaceLLM
|
9 |
import dotenv
|
10 |
import re
|
|
|
11 |
|
12 |
# Load environment variables
|
13 |
dotenv.load_dotenv()
|
14 |
|
15 |
-
# Configure Hugging Face
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
17 |
-
llm_model = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY"))
|
18 |
|
19 |
def write_to_file(content, filename="./files/test.pdf"):
|
20 |
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
@@ -22,9 +29,6 @@ def write_to_file(content, filename="./files/test.pdf"):
|
|
22 |
f.write(content)
|
23 |
|
24 |
def extract_financial_data(document_text):
|
25 |
-
"""
|
26 |
-
Extracts financial data such as revenue and dates from the document text.
|
27 |
-
"""
|
28 |
financial_data = {
|
29 |
"Revenue": [],
|
30 |
"Date": []
|
@@ -35,7 +39,7 @@ def extract_financial_data(document_text):
|
|
35 |
|
36 |
for i, line in enumerate(lines):
|
37 |
if any(keyword in line.lower() for keyword in ["revenue", "total revenue", "sales"]):
|
38 |
-
for j in range(i + 1, i + 6):
|
39 |
matches = revenue_pattern.findall(lines[j])
|
40 |
if matches:
|
41 |
for match in matches:
|
@@ -64,26 +68,28 @@ def load_data(documents):
|
|
64 |
return index
|
65 |
|
66 |
def generate_summary(index, document_text, query):
|
67 |
-
query_engine = index.as_query_engine(
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
87 |
|
88 |
def generate_comparison_graph(data):
|
89 |
if not data["Date"] or not data["Revenue"]:
|
@@ -145,4 +151,3 @@ def main():
|
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
main()
|
148 |
-
|
|
|
5 |
from io import BytesIO
|
6 |
from llama_index import VectorStoreIndex, SimpleDirectoryReader
|
7 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
8 |
import dotenv
|
9 |
import re
|
10 |
+
import requests
|
11 |
|
12 |
# Load environment variables
|
13 |
dotenv.load_dotenv()
|
14 |
|
15 |
+
# Configure Hugging Face API
|
16 |
+
API_URL = "https://api-inference.huggingface.co/models/sarvamai/sarvam-2b-v0.5"
|
17 |
+
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
|
18 |
+
|
19 |
+
def query_huggingface_api(payload):
|
20 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
21 |
+
return response.json()
|
22 |
+
|
23 |
+
# Configure embedding model
|
24 |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
|
|
25 |
|
26 |
def write_to_file(content, filename="./files/test.pdf"):
|
27 |
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
|
|
29 |
f.write(content)
|
30 |
|
31 |
def extract_financial_data(document_text):
|
|
|
|
|
|
|
32 |
financial_data = {
|
33 |
"Revenue": [],
|
34 |
"Date": []
|
|
|
39 |
|
40 |
for i, line in enumerate(lines):
|
41 |
if any(keyword in line.lower() for keyword in ["revenue", "total revenue", "sales"]):
|
42 |
+
for j in range(i + 1, i + 6):
|
43 |
matches = revenue_pattern.findall(lines[j])
|
44 |
if matches:
|
45 |
for match in matches:
|
|
|
68 |
return index
|
69 |
|
70 |
def generate_summary(index, document_text, query):
|
71 |
+
query_engine = index.as_query_engine()
|
72 |
+
llm_response = query_huggingface_api({
|
73 |
+
"inputs": f"""
|
74 |
+
You are a financial analyst. Your task is to provide a comprehensive analysis of the financial document.
|
75 |
+
Analyze the following document and respond to the query:
|
76 |
+
{document_text}
|
77 |
+
|
78 |
+
Query: {query}
|
79 |
+
|
80 |
+
If the query is too general, respond with:
|
81 |
+
Please cover the following aspects:
|
82 |
+
1. Revenue and profit trends
|
83 |
+
2. Key financial metrics
|
84 |
+
3. Major financial events and decisions
|
85 |
+
4. Comparison with previous periods
|
86 |
+
5. Future outlook or forecasts
|
87 |
+
6. Any notable financial risks or opportunities
|
88 |
+
|
89 |
+
Provide a clear, concise, and professional response.
|
90 |
+
"""
|
91 |
+
})
|
92 |
+
return llm_response.get("generated_text", "No response from model.")
|
93 |
|
94 |
def generate_comparison_graph(data):
|
95 |
if not data["Date"] or not data["Revenue"]:
|
|
|
151 |
|
152 |
if __name__ == "__main__":
|
153 |
main()
|
|