Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import PyPDF2 | |
import matplotlib.pyplot as plt | |
from io import BytesIO | |
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.llms.huggingface import HuggingFaceLLM | |
import requests | |
# Configure Hugging Face model | |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
Settings.llm = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY")) | |
def write_to_file(content, filename="./files/test.pdf"): | |
os.makedirs(os.path.dirname(filename), exist_ok=True) | |
with open(filename, "wb") as f: | |
f.write(content) | |
def ingest_documents(): | |
reader = SimpleDirectoryReader("./files/") | |
documents = reader.load_data() | |
return documents | |
def load_data(documents): | |
index = VectorStoreIndex.from_documents(documents) | |
return index | |
def generate_summary(index, document_text): | |
query_engine = index.as_query_engine() | |
response = query_engine.query(f""" | |
You are a financial analyst. Your task is to provide a comprehensive summary of the given financial document. | |
Analyze the following document and summarize it: | |
{document_text} | |
Please cover the following aspects: | |
1. Revenue and profit trends | |
2. Key financial metrics | |
3. Major financial events and decisions | |
4. Comparison with previous periods | |
5. Future outlook or forecasts | |
6. Any notable financial risks or opportunities | |
Provide a clear, concise, and professional summary | |
""") | |
return response.response | |
def generate_comparison_graph(data): | |
fig, ax = plt.subplots() | |
ax.plot(data["Date"], data["Revenue"], marker="o") | |
ax.set_title("Revenue Comparison") | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Revenue (in millions)") | |
st.pyplot(fig) | |
# Streamlit app | |
def main(): | |
st.title("Financial Document Summarizer") | |
st.write("Upload a financial document, and let our AI summarize it!") | |
uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"]) | |
if uploaded_file is not None: | |
if uploaded_file.type == "application/pdf": | |
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue())) | |
document_text = "" | |
for page in pdf_reader.pages: | |
document_text += page.extract_text() | |
else: | |
document_text = uploaded_file.getvalue().decode("utf-8") | |
write_to_file(uploaded_file.getvalue()) | |
st.write("Analyzing financial document...") | |
documents = ingest_documents() | |
index = load_data(documents) | |
summary = generate_summary(index, document_text) | |
st.write("## Financial Document Summary") | |
st.write(summary) | |
# Example data for graph (replace with actual data extraction logic) | |
data = { | |
"Date": ["Q1 2017", "Q2 2017", "Q1 2018", "Q2 2018"], | |
"Revenue": [500, 550, 600, 620] | |
} | |
st.write("## Revenue Comparison") | |
generate_comparison_graph(data) | |
if __name__ == "__main__": | |
main() | |