Spaces:

Rafa1986
/

Data-Analytics-Class

Sleeping

App Files Files Community

Data-Analytics-Class / app.py

Rafa1986

Update app.py

0c80efa verified 18 days ago

raw

history blame

3.69 kB

	import gradio as gr
	import os
	import PyPDF2
	import pandas as pd
	import openai
	import docx
	import requests
	import json
	from docx import Document
	from langchain_community.embeddings import OpenAIEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_community.llms import OpenAI
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	def detect_language(text):
	"""Detects the language of the input text using OpenAI."""
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "Detect the language of this text."},
	{"role": "user", "content": text}
	]
	)
	return response["choices"][0]["message"]["content"].strip()

	# Set up OpenAI API key (replace with your key)
	openai.api_key = "YOUR_OPENAI_API_KEY"

	def extract_files_from_folder(folder_path):
	"""Scans a folder and its subfolders for PDF, TXT, CSV, DOCX, and IPYNB files."""
	extracted_files = {"pdf": [], "txt": [], "csv": [], "docx": [], "ipynb": []}

	print(f"Scanning folder: {folder_path}")
	for root, subdirs, files in os.walk(folder_path):
	print(f"Checking folder: {root}") # Debugging log for subfolders
	for file_name in files:
	file_path = os.path.join(root, file_name)
	print(f"Found file: {file_path}")
	if file_name.endswith(".pdf"):
	extracted_files["pdf"].append(file_path)
	elif file_name.endswith(".txt"):
	extracted_files["txt"].append(file_path)
	elif file_name.endswith(".csv"):
	extracted_files["csv"].append(file_path)
	elif file_name.endswith(".docx"):
	extracted_files["docx"].append(file_path)
	elif file_name.endswith(".ipynb"):
	extracted_files["ipynb"].append(file_path)

	print("Files found:", extracted_files) # Debugging log
	return extracted_files

	def combine_text_from_files(extracted_files):
	"""Combines text from all extracted files."""
	text = (
	get_text_from_pdf(extracted_files["pdf"]) +
	read_text_from_files(extracted_files["txt"]) +
	get_text_from_csv(extracted_files["csv"]) +
	get_text_from_docx(extracted_files["docx"]) +
	get_text_from_ipynb(extracted_files["ipynb"])
	)
	return text

	def generate_response(question, text):
	"""Uses OpenAI to answer a question based on extracted text."""
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a data analytics assistant. Answer the question based on the provided document content."},
	{"role": "user", "content": f"{question}\n\nBased on the following document content:\n{text[:3000]}"} # Limit to 3000 characters to avoid excessive token usage
	]
	)
	return response["choices"][0]["message"]["content"].strip()

	def chatbot_interface(question):
	folder_path = "New_Data_Analytics/"
	extracted_files = extract_files_from_folder(folder_path)

	text = combine_text_from_files(extracted_files)

	print("Final extracted text for chatbot processing:", text[:500]) # Debugging log (First 500 chars)

	if not text.strip():
	return "The folder does not contain valid PDF, TXT, CSV, DOCX, or IPYNB files. Please upload supported file types."

	return generate_response(question, text)

	# Gradio interface
	demo = gr.Interface(
	fn=chatbot_interface,
	inputs=gr.Textbox(label="Ask a question", placeholder="Type your question here..."),
	outputs=gr.Textbox(label="Answer")
	)

	demo.launch()