mfraz commited on
Commit
2320a8a
·
verified ·
1 Parent(s): 3371665

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -22
app.py CHANGED
@@ -1,32 +1,33 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import os
4
  from transformers import T5ForConditionalGeneration, T5Tokenizer
5
- import groq
6
 
7
- # Initialize Groq API
8
- groq_client = groq.Client(api_key="financialstatements")
9
-
10
- # Load RAG components
11
- retriever_tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
12
- retriever_model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
13
  generator_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
14
  generator_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
15
 
 
 
 
 
 
 
16
  # Function to process user input and generate financial statements
17
  def generate_financial_statements(file, file_type):
18
- # Read the file
19
  if file_type == "csv":
20
  df = pd.read_csv(file)
 
21
  elif file_type == "excel":
22
  df = pd.read_excel(file)
 
 
 
23
  else:
24
- st.error("Unsupported file type. Please upload a CSV or Excel file.")
25
  return
26
 
27
- # Convert the data into a context string
28
- context = df.to_string()
29
-
30
  # Define financial statement queries
31
  queries = [
32
  "Generate a journal from the following financial data:",
@@ -36,19 +37,15 @@ def generate_financial_statements(file, file_type):
36
  "Generate a cash flow statement from the following financial data:"
37
  ]
38
 
39
- # Generate financial statements using RAG
40
  financial_statements = {}
41
  for query in queries:
42
  # Combine query and context
43
  input_text = f"{query}\n{context}"
44
 
45
- # Retrieve relevant information (optional, if using a retriever)
46
- input_ids = retriever_tokenizer(input_text, return_tensors="pt").input_ids
47
- retrieved_context = retriever_model(input_ids)
48
-
49
  # Generate response using the generator model
50
- input_ids = generator_tokenizer(input_text, return_tensors="pt").input_ids
51
- output = generator_model.generate(input_ids)
52
  response = generator_tokenizer.decode(output[0], skip_special_tokens=True)
53
 
54
  # Store the result
@@ -58,10 +55,10 @@ def generate_financial_statements(file, file_type):
58
 
59
  # Streamlit UI
60
  st.title("Financial Statement Generator")
61
- st.write("Upload your financial data (CSV or Excel) to generate journal, general ledger, income statement, balance sheet, and cash flow statement.")
62
 
63
  # File upload
64
- uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx"])
65
  if uploaded_file is not None:
66
  file_type = uploaded_file.name.split(".")[-1]
67
  financial_statements = generate_financial_statements(uploaded_file, file_type)
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from transformers import T5ForConditionalGeneration, T5Tokenizer
4
+ from docx import Document
5
 
6
+ # Load the generator model (FLAN-T5)
 
 
 
 
 
7
  generator_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
8
  generator_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
9
 
10
+ # Function to read DOCS files
11
+ def read_docs(file):
12
+ doc = Document(file)
13
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
14
+ return text
15
+
16
  # Function to process user input and generate financial statements
17
  def generate_financial_statements(file, file_type):
18
+ # Read the file based on its type
19
  if file_type == "csv":
20
  df = pd.read_csv(file)
21
+ context = df.to_string()
22
  elif file_type == "excel":
23
  df = pd.read_excel(file)
24
+ context = df.to_string()
25
+ elif file_type == "docs":
26
+ context = read_docs(file)
27
  else:
28
+ st.error("Unsupported file type. Please upload a CSV, Excel, or DOCS file.")
29
  return
30
 
 
 
 
31
  # Define financial statement queries
32
  queries = [
33
  "Generate a journal from the following financial data:",
 
37
  "Generate a cash flow statement from the following financial data:"
38
  ]
39
 
40
+ # Generate financial statements using the generator model
41
  financial_statements = {}
42
  for query in queries:
43
  # Combine query and context
44
  input_text = f"{query}\n{context}"
45
 
 
 
 
 
46
  # Generate response using the generator model
47
+ input_ids = generator_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).input_ids
48
+ output = generator_model.generate(input_ids, max_length=512)
49
  response = generator_tokenizer.decode(output[0], skip_special_tokens=True)
50
 
51
  # Store the result
 
55
 
56
  # Streamlit UI
57
  st.title("Financial Statement Generator")
58
+ st.write("Upload your financial data (CSV, Excel, or DOCS) to generate journal, general ledger, income statement, balance sheet, and cash flow statement.")
59
 
60
  # File upload
61
+ uploaded_file = st.file_uploader("Upload your file", type=["csv", "xlsx", "docx"])
62
  if uploaded_file is not None:
63
  file_type = uploaded_file.name.split(".")[-1]
64
  financial_statements = generate_financial_statements(uploaded_file, file_type)