Spaces:
Sleeping
Sleeping
File size: 3,148 Bytes
86b7caa ea26600 86b7caa ea26600 86b7caa ea26600 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import streamlit as st
import PyPDF2
import matplotlib.pyplot as plt
from io import BytesIO
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
import requests
# Configure Hugging Face model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = HuggingFaceLLM(model_name="sarvamai/sarvam-2b-v0.5", api_token=os.getenv("HUGGINGFACE_API_KEY"))
def write_to_file(content, filename="./files/test.pdf"):
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, "wb") as f:
f.write(content)
def ingest_documents():
reader = SimpleDirectoryReader("./files/")
documents = reader.load_data()
return documents
def load_data(documents):
index = VectorStoreIndex.from_documents(documents)
return index
def generate_summary(index, document_text):
query_engine = index.as_query_engine()
response = query_engine.query(f"""
You are a financial analyst. Your task is to provide a comprehensive summary of the given financial document.
Analyze the following document and summarize it:
{document_text}
Please cover the following aspects:
1. Revenue and profit trends
2. Key financial metrics
3. Major financial events and decisions
4. Comparison with previous periods
5. Future outlook or forecasts
6. Any notable financial risks or opportunities
Provide a clear, concise, and professional summary
""")
return response.response
def generate_comparison_graph(data):
fig, ax = plt.subplots()
ax.plot(data["Date"], data["Revenue"], marker="o")
ax.set_title("Revenue Comparison")
ax.set_xlabel("Date")
ax.set_ylabel("Revenue (in millions)")
st.pyplot(fig)
# Streamlit app
def main():
st.title("Financial Document Summarizer")
st.write("Upload a financial document, and let our AI summarize it!")
uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])
if uploaded_file is not None:
if uploaded_file.type == "application/pdf":
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
document_text = ""
for page in pdf_reader.pages:
document_text += page.extract_text()
else:
document_text = uploaded_file.getvalue().decode("utf-8")
write_to_file(uploaded_file.getvalue())
st.write("Analyzing financial document...")
documents = ingest_documents()
index = load_data(documents)
summary = generate_summary(index, document_text)
st.write("## Financial Document Summary")
st.write(summary)
# Example data for graph (replace with actual data extraction logic)
data = {
"Date": ["Q1 2017", "Q2 2017", "Q1 2018", "Q2 2018"],
"Revenue": [500, 550, 600, 620]
}
st.write("## Revenue Comparison")
generate_comparison_graph(data)
if __name__ == "__main__":
main()
|