Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,32 +1,33 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import os
|
4 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
5 |
-
import
|
6 |
|
7 |
-
#
|
8 |
-
groq_client = groq.Client(api_key="financialstatements")
|
9 |
-
|
10 |
-
# Load RAG components
|
11 |
-
retriever_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
|
12 |
-
retriever_model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
|
13 |
generator_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
14 |
generator_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Function to process user input and generate financial statements
|
17 |
def generate_financial_statements(file, file_type):
|
18 |
-
# Read the file
|
19 |
if file_type == "csv":
|
20 |
df = pd.read_csv(file)
|
|
|
21 |
elif file_type == "excel":
|
22 |
df = pd.read_excel(file)
|
|
|
|
|
|
|
23 |
else:
|
24 |
-
st.error("Unsupported file type. Please upload a CSV or
|
25 |
return
|
26 |
|
27 |
-
# Convert the data into a context string
|
28 |
-
context = df.to_string()
|
29 |
-
|
30 |
# Define financial statement queries
|
31 |
queries = [
|
32 |
"Generate a journal from the following financial data:",
|
@@ -36,19 +37,15 @@ def generate_financial_statements(file, file_type):
|
|
36 |
"Generate a cash flow statement from the following financial data:"
|
37 |
]
|
38 |
|
39 |
-
# Generate financial statements using
|
40 |
financial_statements = {}
|
41 |
for query in queries:
|
42 |
# Combine query and context
|
43 |
input_text = f"{query}\n{context}"
|
44 |
|
45 |
-
# Retrieve relevant information (optional, if using a retriever)
|
46 |
-
input_ids = retriever_tokenizer(input_text, return_tensors="pt").input_ids
|
47 |
-
retrieved_context = retriever_model(input_ids)
|
48 |
-
|
49 |
# Generate response using the generator model
|
50 |
-
input_ids = generator_tokenizer(input_text, return_tensors="pt").input_ids
|
51 |
-
output = generator_model.generate(input_ids)
|
52 |
response = generator_tokenizer.decode(output[0], skip_special_tokens=True)
|
53 |
|
54 |
# Store the result
|
@@ -58,10 +55,10 @@ def generate_financial_statements(file, file_type):
|
|
58 |
|
59 |
# Streamlit UI
|
60 |
st.title("Financial Statement Generator")
|
61 |
-
st.write("Upload your financial data (CSV or
|
62 |
|
63 |
# File upload
|
64 |
-
uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx"])
|
65 |
if uploaded_file is not None:
|
66 |
file_type = uploaded_file.name.split(".")[-1]
|
67 |
financial_statements = generate_financial_statements(uploaded_file, file_type)
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
4 |
+
from docx import Document
|
5 |
|
6 |
+
# Load the generator model (FLAN-T5)
|
|
|
|
|
|
|
|
|
|
|
7 |
generator_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
8 |
generator_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
|
9 |
|
10 |
+
# Function to read DOCS files
|
11 |
+
def read_docs(file):
|
12 |
+
doc = Document(file)
|
13 |
+
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
14 |
+
return text
|
15 |
+
|
16 |
# Function to process user input and generate financial statements
|
17 |
def generate_financial_statements(file, file_type):
|
18 |
+
# Read the file based on its type
|
19 |
if file_type == "csv":
|
20 |
df = pd.read_csv(file)
|
21 |
+
context = df.to_string()
|
22 |
elif file_type == "excel":
|
23 |
df = pd.read_excel(file)
|
24 |
+
context = df.to_string()
|
25 |
+
elif file_type == "docs":
|
26 |
+
context = read_docs(file)
|
27 |
else:
|
28 |
+
st.error("Unsupported file type. Please upload a CSV, Excel, or DOCS file.")
|
29 |
return
|
30 |
|
|
|
|
|
|
|
31 |
# Define financial statement queries
|
32 |
queries = [
|
33 |
"Generate a journal from the following financial data:",
|
|
|
37 |
"Generate a cash flow statement from the following financial data:"
|
38 |
]
|
39 |
|
40 |
+
# Generate financial statements using the generator model
|
41 |
financial_statements = {}
|
42 |
for query in queries:
|
43 |
# Combine query and context
|
44 |
input_text = f"{query}\n{context}"
|
45 |
|
|
|
|
|
|
|
|
|
46 |
# Generate response using the generator model
|
47 |
+
input_ids = generator_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).input_ids
|
48 |
+
output = generator_model.generate(input_ids, max_length=512)
|
49 |
response = generator_tokenizer.decode(output[0], skip_special_tokens=True)
|
50 |
|
51 |
# Store the result
|
|
|
55 |
|
56 |
# Streamlit UI
|
57 |
st.title("Financial Statement Generator")
|
58 |
+
st.write("Upload your financial data (CSV, Excel, or DOCS) to generate journal, general ledger, income statement, balance sheet, and cash flow statement.")
|
59 |
|
60 |
# File upload
|
61 |
+
uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx", "docx"])
|
62 |
if uploaded_file is not None:
|
63 |
file_type = uploaded_file.name.split(".")[-1]
|
64 |
financial_statements = generate_financial_statements(uploaded_file, file_type)
|