Spaces:

mfraz
/

Financial-Statements

Sleeping

App Files Files Community

mfraz commited on Feb 21

Commit

1896b1a

verified ·

1 Parent(s): 00ef1a7

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -62

app.py CHANGED Viewed

@@ -1,72 +1,132 @@
 import streamlit as st
-import pandas as pd
-from transformers import T5ForConditionalGeneration, T5Tokenizer
 from docx import Document
-# Load the generator model (FLAN-T5)
-generator_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
-generator_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
-# Function to read DOCS files
-def read_docs(file):
     doc = Document(file)
-    text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
     return text
-# Function to process user input and generate financial statements
-def generate_financial_statements(file, file_type):
-    # Read the file based on its type
-    if file_type == "csv":
-        df = pd.read_csv(file)
-        context = df.to_string()
-    elif file_type == "xlsx":
-        df = pd.read_excel(file)
-        context = df.to_string()
-    elif file_type == "docx":
-        context = read_docs(file)
-    else:
-        st.error("Unsupported file type. Please upload a CSV, Excel, or DOCS file.")
-        return None
-    # Define financial statement queries
-    queries = [
-        "Generate a journal from the following financial data:",
-        "Generate a general ledger from the following financial data:",
-        "Generate an income statement from the following financial data:",
-        "Generate a balance sheet from the following financial data:",
-        "Generate a cash flow statement from the following financial data:"
-    ]
-    # Generate financial statements using the generator model
-    financial_statements = {}
-    for query in queries:
-        # Combine query and context
-        input_text = f"{query}\n{context}"
-        # Generate response using the generator model
-        input_ids = generator_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).input_ids
-        output = generator_model.generate(input_ids, max_length=512)
-        response = generator_tokenizer.decode(output[0], skip_special_tokens=True)
-        # Store the result
-        financial_statements[query] = response
-    return financial_statements
-# Streamlit UI
-st.title("Financial Statement Generator")
-st.write("Upload your financial data (CSV, Excel, or DOCS) to generate journal, general ledger, income statement, balance sheet, and cash flow statement.")
-# File upload
-uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx", "docx"])
 if uploaded_file is not None:
-    file_type = uploaded_file.name.split(".")[-1].lower()  # Ensure lowercase file type
-    financial_statements = generate_financial_statements(uploaded_file, file_type)
-    # Display results if financial_statements is not None
-    if financial_statements is not None:
-        for statement_type, statement in financial_statements.items():
-            st.subheader(statement_type)
-            st.write(statement)
     else:
-        st.error("Failed to generate financial statements. Please check the file type and content.")

+# Import necessary libraries
 import streamlit as st
+from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
+import PyPDF2
 from docx import Document
+import pandas as pd
+import os
+# Set up Streamlit app
+st.title("Financial Statement Generator")
+st.write("Upload a PDF or DOCX file to generate financial statements.")
+# File upload
+uploaded_file = st.file_uploader("Upload a PDF or DOCX file", type=["pdf", "docx"])
+# Function to extract text from PDF
+def extract_text_from_pdf(file):
+    pdf_reader = PyPDF2.PdfFileReader(file)
+    text = ""
+    for page_num in range(pdf_reader.getNumPages()):
+        page = pdf_reader.getPage(page_num)
+        text += page.extract_text()
+    return text
+# Function to extract text from DOCX
+def extract_text_from_docx(file):
     doc = Document(file)
+    text = ""
+    for paragraph in doc.paragraphs:
+        text += paragraph.text + "\n"
     return text
+# Function to process extracted text using RAG model
+def generate_financial_statements(text):
+    # Load RAG model and tokenizer
+    tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-base")
+    retriever = RagRetriever.from_pretrained("facebook/rag-sequence-base", index_name="exact")
+    model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-base", retriever=retriever)
+    # Tokenize input text
+    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
+    # Generate financial statements
+    outputs = model.generate(input_ids=inputs["input_ids"], max_length=1000)
+    # Decode generated text
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text
+# Function to parse generated text into financial statements
+def parse_financial_statements(generated_text):
+    # Placeholder logic for parsing generated text into structured data
+    # You can customize this based on your specific requirements
+    statements = {
+        "Ledger": [],
+        "Journal General": [],
+        "Income Statement": [],
+        "Balance Sheet": [],
+        "Cash Flow Statement": []
+    }
+    # Example parsing logic (replace with actual logic)
+    lines = generated_text.split("\n")
+    for line in lines:
+        if "Transaction:" in line:
+            statements["Ledger"].append(line)
+        elif "Revenue:" in line or "Expense:" in line:
+            statements["Income Statement"].append(line)
+        elif "Asset:" in line or "Liability:" in line or "Equity:" in line:
+            statements["Balance Sheet"].append(line)
+        elif "Cash Inflow:" in line or "Cash Outflow:" in line:
+            statements["Cash Flow Statement"].append(line)
+    return statements
+# Main logic
 if uploaded_file is not None:
+    # Extract text from uploaded file
+    if uploaded_file.type == "application/pdf":
+        text = extract_text_from_pdf(uploaded_file)
+    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        text = extract_text_from_docx(uploaded_file)
     else:
+        st.error("Unsupported file format. Please upload a PDF or DOCX file.")
+        st.stop()
+    # Display extracted text
+    st.subheader("Extracted Text")
+    st.write(text)
+    # Generate financial statements
+    st.subheader("Generated Financial Statements")
+    generated_text = generate_financial_statements(text)
+    statements = parse_financial_statements(generated_text)
+    # Display financial statements
+    for statement_type, data in statements.items():
+        st.write(f"### {statement_type}")
+        if data:
+            st.write(data)
+        else:
+            st.write("No data available for this statement.")
+    # Allow users to download statements as CSV
+    for statement_type, data in statements.items():
+        if data:
+            df = pd.DataFrame(data, columns=[statement_type])
+            csv = df.to_csv(index=False)
+            st.download_button(
+                label=f"Download {statement_type} as CSV",
+                data=csv,
+                file_name=f"{statement_type.lower().replace(' ', '_')}.csv",
+                mime="text/csv"
+            )
+# Dependencies
+st.sidebar.subheader("Dependencies")
+st.sidebar.write("""
+- Streamlit
+- Hugging Face Transformers
+- PyPDF2
+- python-docx
+- pandas
+""")
+# Deployment instructions
+st.sidebar.subheader("Deployment Instructions")
+st.sidebar.write("""
+1. Install dependencies: `pip install streamlit transformers PyPDF2 python-docx pandas`
+2. Run the app: `streamlit run app.py`
+3. Access the app in your browser at `http://localhost:8501`
+""")