CSVBot-Llama2 / app.py
hmrizal's picture
Update app.py
704342a verified
raw
history blame
11.6 kB
import gradio as gr
import os
import uuid
import threading
import pandas as pd
import numpy as np
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain_experimental.agents import create_pandas_dataframe_agent
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Global model cache
MODEL_CACHE = {
"model": None,
"init_lock": threading.Lock()
}
# Create directories for user data
os.makedirs("user_data", exist_ok=True)
def initialize_model_once():
"""Initialize model once using CTransformers API"""
with MODEL_CACHE["init_lock"]:
if MODEL_CACHE["model"] is None:
# Load Phi-2 model (smaller than Mistral)
MODEL_CACHE["model"] = CTransformers(
model="TheBloke/phi-2-GGUF",
model_file="phi-2.Q4_K_M.gguf",
model_type="phi2",
max_new_tokens=512,
temperature=0.1,
top_p=0.9,
repetition_penalty=1.1,
context_length=2048
)
return MODEL_CACHE["model"]
class ChatBot:
def __init__(self, session_id):
self.session_id = session_id
self.csv_info = None
self.df = None
self.chat_history = []
self.user_dir = f"user_data/{session_id}"
os.makedirs(self.user_dir, exist_ok=True)
def process_file(self, file):
if file is None:
return "Mohon upload file CSV terlebih dahulu."
try:
# Handle file from Gradio
file_path = file.name if hasattr(file, 'name') else str(file)
file_name = os.path.basename(file_path)
# Load and save CSV directly with pandas
try:
self.df = pd.read_csv(file_path)
user_file_path = f"{self.user_dir}/uploaded.csv"
self.df.to_csv(user_file_path, index=False)
# Store CSV info
self.csv_info = {
"filename": file_name,
"rows": self.df.shape[0],
"columns": self.df.shape[1],
"column_names": self.df.columns.tolist(),
}
print(f"CSV verified: {self.df.shape[0]} rows, {len(self.df.columns)} columns")
except Exception as e:
return f"Error membaca CSV: {str(e)}"
# Create query translator
try:
llm = initialize_model_once()
query_template = """
Kamu adalah asisten yang mengubah pertanyaan natural language menjadi kode Python dengan pandas.
Informasi tentang DataFrame:
- Nama kolom: {column_names}
- Jumlah baris: {num_rows}
- Sample data:
{sample_data}
Pertanyaan pengguna: {question}
Ubah pertanyaan tersebut menjadi kode pandas yang bisa dijalankan. Kode harus ringkas, efisien, dan menggunakan variabel 'df'.
Berikan HANYA kode python saja, tanpa backtick, tanpa penjelasan.
Kode:
"""
self.query_chain = LLMChain(
llm=llm,
prompt=PromptTemplate(
input_variables=["column_names", "num_rows", "sample_data", "question"],
template=query_template
)
)
print("Query translator created successfully")
except Exception as e:
return f"Error creating query translator: {str(e)}"
# Add file info to chat history
file_info = f"CSV berhasil dimuat: {file_name} dengan {self.df.shape[0]} baris dan {len(self.df.columns)} kolom. Kolom: {', '.join(self.df.columns.tolist())}"
self.chat_history.append(("System", file_info))
return f"File CSV '{file_name}' berhasil diproses! Anda dapat mulai mengajukan pertanyaan tentang data."
except Exception as e:
import traceback
print(traceback.format_exc())
return f"Error pemrosesan file: {str(e)}"
def chat(self, message, history):
if self.df is None or self.query_chain is None:
return "Mohon upload file CSV terlebih dahulu."
try:
# Handle metadata questions directly
message_lower = message.lower()
if "nama file" in message_lower:
return f"Nama file CSV adalah: {self.csv_info['filename']}"
elif "nama kolom" in message_lower:
return f"Kolom dalam CSV: {', '.join(self.csv_info['column_names'])}"
elif "jumlah baris" in message_lower or "berapa baris" in message_lower:
return f"Jumlah baris dalam CSV: {self.csv_info['rows']}"
# Get sample data for context
sample_str = self.df.head(3).to_string()
# Translate question to pandas code
code_response = self.query_chain.run(
column_names=str(self.csv_info["column_names"]),
num_rows=self.csv_info["rows"],
sample_data=sample_str,
question=message
)
# Clean and execute the code
try:
code = code_response.strip()
# Add safety prefix to prevent malicious code
if not code.startswith("df"):
code = "result = " + code
else:
code = "result = " + code
# Create local context with the dataframe
locals_dict = {"df": self.df, "pd": pd, "np": np}
# Execute the code
print(f"Executing code: {code}")
exec(code, {"pd": pd, "np": np}, locals_dict)
result = locals_dict.get("result", "No result returned")
# Format the result
if isinstance(result, pd.DataFrame):
if len(result) > 5:
result_str = result.head(5).to_string() + f"\n\n[{len(result)} baris ditemukan]"
else:
result_str = result.to_string()
elif isinstance(result, (pd.Series, np.ndarray)):
result_str = str(result)
else:
result_str = str(result)
# Build the response
response = f"Hasil analisis untuk pertanyaan: '{message}'\n\n"
response += f"Kode yang digunakan:\n```python\n{code}\n```\n\n"
response += f"Output:\n{result_str}"
self.chat_history.append((message, response))
return response
except Exception as e:
error_msg = f"Error mengeksekusi kode: {str(e)}\nKode yang dihasilkan:\n```python\n{code}\n```"
print(error_msg)
return error_msg
except Exception as e:
import traceback
print(traceback.format_exc())
return f"Error: {str(e)}"
# UI Code
def create_gradio_interface():
with gr.Blocks(title="CSV Data Analyzer") as interface:
session_id = gr.State(lambda: str(uuid.uuid4()))
chatbot_state = gr.State(lambda: None)
gr.HTML("<h1 style='text-align: center;'>CSV Data Analyzer</h1>")
gr.HTML("<h3 style='text-align: center;'>Ajukan pertanyaan tentang data CSV Anda</h3>")
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload CSV Anda",
file_types=[".csv"]
)
process_button = gr.Button("Proses CSV")
with gr.Accordion("Contoh Pertanyaan", open=False):
gr.Markdown("""
- "Berapa jumlah data yang memiliki nilai Glucose di atas 150?"
- "Bagaimana distribusi kolom Age?"
- "Hitung nilai rata-rata dan standar deviasi untuk setiap kolom numerik"
- "Buat tabel frekuensi untuk kolom Outcome"
""")
with gr.Column(scale=2):
chatbot_interface = gr.Chatbot(
label="Riwayat Chat",
height=400
)
message_input = gr.Textbox(
label="Ketik pertanyaan Anda",
placeholder="Contoh: Berapa jumlah data yang memiliki nilai Glucose di atas 150?",
lines=2
)
submit_button = gr.Button("Kirim")
clear_button = gr.Button("Bersihkan Chat")
# Handler functions
def handle_process_file(file, sess_id):
chatbot = ChatBot(sess_id)
result = chatbot.process_file(file)
return chatbot, [(None, result)]
process_button.click(
fn=handle_process_file,
inputs=[file_input, session_id],
outputs=[chatbot_state, chatbot_interface]
)
def user_message_submitted(message, history, chatbot, sess_id):
history = history + [(message, None)]
return history, "", chatbot, sess_id
def bot_response(history, chatbot, sess_id):
if chatbot is None:
chatbot = ChatBot(sess_id)
history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
return chatbot, history
user_message = history[-1][0]
response = chatbot.chat(user_message, history[:-1])
history[-1] = (user_message, response)
return chatbot, history
submit_button.click(
fn=user_message_submitted,
inputs=[message_input, chatbot_interface, chatbot_state, session_id],
outputs=[chatbot_interface, message_input, chatbot_state, session_id]
).then(
fn=bot_response,
inputs=[chatbot_interface, chatbot_state, session_id],
outputs=[chatbot_state, chatbot_interface]
)
message_input.submit(
fn=user_message_submitted,
inputs=[message_input, chatbot_interface, chatbot_state, session_id],
outputs=[chatbot_interface, message_input, chatbot_state, session_id]
).then(
fn=bot_response,
inputs=[chatbot_interface, chatbot_state, session_id],
outputs=[chatbot_state, chatbot_interface]
)
def handle_clear_chat(chatbot):
if chatbot is not None:
chatbot.chat_history = []
return chatbot, []
clear_button.click(
fn=handle_clear_chat,
inputs=[chatbot_state],
outputs=[chatbot_state, chatbot_interface]
)
return interface
# Launch the interface
if __name__ == "__main__":
demo = create_gradio_interface()
demo.launch(share=True)