Rafa1986 commited on
Commit
0c80efa
·
verified ·
1 Parent(s): 0302345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -61
app.py CHANGED
@@ -50,65 +50,8 @@ def extract_files_from_folder(folder_path):
50
  print("Files found:", extracted_files) # Debugging log
51
  return extracted_files
52
 
53
- def read_text_from_files(file_paths):
54
- """Reads text content from a list of files."""
55
- text = ""
56
- for file_path in file_paths:
57
- print(f"Reading text file: {file_path}") # Debugging log
58
- with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
59
- file_text = file.read()
60
- text += file_text + "\n"
61
- print("Extracted text from TXT files:", text[:500]) # Debugging log (First 500 chars)
62
- return text
63
-
64
- def get_text_from_pdf(pdf_files):
65
- text = ""
66
- for pdf_path in pdf_files:
67
- print(f"Reading PDF file: {pdf_path}") # Debugging log
68
- with open(pdf_path, "rb") as pdf_file:
69
- reader = PyPDF2.PdfReader(pdf_file)
70
- for page in reader.pages:
71
- page_text = page.extract_text()
72
- if page_text:
73
- text += page_text + "\n"
74
- print("Extracted text from PDF files:", text[:500]) # Debugging log (First 500 chars)
75
- return text
76
-
77
- def get_text_from_csv(csv_files):
78
- text = ""
79
- for csv_path in csv_files:
80
- print(f"Reading CSV file: {csv_path}") # Debugging log
81
- df = pd.read_csv(csv_path)
82
- text += df.to_string() + "\n"
83
- print("Extracted text from CSV files:", text[:500]) # Debugging log (First 500 chars)
84
- return text
85
-
86
- def get_text_from_docx(docx_files):
87
- text = ""
88
- for docx_path in docx_files:
89
- print(f"Reading DOCX file: {docx_path}") # Debugging log
90
- doc = Document(docx_path)
91
- for para in doc.paragraphs:
92
- text += para.text + "\n"
93
- print("Extracted text from DOCX files:", text[:500]) # Debugging log (First 500 chars)
94
- return text
95
-
96
- def get_text_from_ipynb(ipynb_files):
97
- text = ""
98
- for ipynb_path in ipynb_files:
99
- print(f"Reading IPYNB file: {ipynb_path}") # Debugging log
100
- with open(ipynb_path, "r", encoding="utf-8") as file:
101
- notebook = json.load(file)
102
- for cell in notebook.get("cells", []):
103
- if cell.get("cell_type") == "markdown":
104
- text += "\n".join(cell.get("source", [])) + "\n"
105
- print("Extracted text from IPYNB files:", text[:500]) # Debugging log (First 500 chars)
106
- return text
107
-
108
- def chatbot_interface(question):
109
- folder_path = "New_Data_Analytics/"
110
- extracted_files = extract_files_from_folder(folder_path)
111
-
112
  text = (
113
  get_text_from_pdf(extracted_files["pdf"]) +
114
  read_text_from_files(extracted_files["txt"]) +
@@ -116,13 +59,31 @@ def chatbot_interface(question):
116
  get_text_from_docx(extracted_files["docx"]) +
117
  get_text_from_ipynb(extracted_files["ipynb"])
118
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  print("Final extracted text for chatbot processing:", text[:500]) # Debugging log (First 500 chars)
121
 
122
  if not text.strip():
123
  return "The folder does not contain valid PDF, TXT, CSV, DOCX, or IPYNB files. Please upload supported file types."
124
 
125
- return "Files successfully read. Processing question..."
126
 
127
  # Gradio interface
128
  demo = gr.Interface(
@@ -131,4 +92,4 @@ demo = gr.Interface(
131
  outputs=gr.Textbox(label="Answer")
132
  )
133
 
134
- demo.launch()
 
50
  print("Files found:", extracted_files) # Debugging log
51
  return extracted_files
52
 
53
+ def combine_text_from_files(extracted_files):
54
+ """Combines text from all extracted files."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  text = (
56
  get_text_from_pdf(extracted_files["pdf"]) +
57
  read_text_from_files(extracted_files["txt"]) +
 
59
  get_text_from_docx(extracted_files["docx"]) +
60
  get_text_from_ipynb(extracted_files["ipynb"])
61
  )
62
+ return text
63
+
64
+ def generate_response(question, text):
65
+ """Uses OpenAI to answer a question based on extracted text."""
66
+ response = openai.ChatCompletion.create(
67
+ model="gpt-3.5-turbo",
68
+ messages=[
69
+ {"role": "system", "content": "You are a data analytics assistant. Answer the question based on the provided document content."},
70
+ {"role": "user", "content": f"{question}\n\nBased on the following document content:\n{text[:3000]}"} # Limit to 3000 characters to avoid excessive token usage
71
+ ]
72
+ )
73
+ return response["choices"][0]["message"]["content"].strip()
74
+
75
+ def chatbot_interface(question):
76
+ folder_path = "New_Data_Analytics/"
77
+ extracted_files = extract_files_from_folder(folder_path)
78
+
79
+ text = combine_text_from_files(extracted_files)
80
 
81
  print("Final extracted text for chatbot processing:", text[:500]) # Debugging log (First 500 chars)
82
 
83
  if not text.strip():
84
  return "The folder does not contain valid PDF, TXT, CSV, DOCX, or IPYNB files. Please upload supported file types."
85
 
86
+ return generate_response(question, text)
87
 
88
  # Gradio interface
89
  demo = gr.Interface(
 
92
  outputs=gr.Textbox(label="Answer")
93
  )
94
 
95
+ demo.launch()