Embedding-Atlas / app.py
broadfield-dev's picture
Update app.py
8904c8e verified
raw
history blame
1.43 kB
from datasets import load_dataset
import pandas as pd
import os
# --- Configuration ---
# 1. Hardcode the name of the Hugging Face dataset
dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset"
# 2. Define the name for the local file where the data will be saved
local_file_path = "trendyol_cybersecurity_dataset.csv"
# 3. Define the port for the Embedding Atlas server
port = 7860
# --- Script Logic ---
# Step 1: Load the dataset from Hugging Face
print(f"Loading dataset '{dataset_name}' from the Hub...")
try:
dataset = load_dataset(dataset_name, split="train")
except Exception as e:
print(f"Failed to load dataset. Error: {e}")
exit()
# Step 2: Convert the dataset to a Pandas DataFrame
print("Converting dataset to Pandas DataFrame...")
df = dataset.to_pandas()
# Step 3: Save the DataFrame to a local CSV file
# This is the crucial step. The CLI tool will read from this file.
# We use index=False to avoid saving the pandas index as an extra column.
print(f"Saving DataFrame to a local file: '{local_file_path}'")
df.to_csv(local_file_path, index=False)
print("Save complete.")
# Step 4: Construct and run the CLI command using the LOCAL file path
command = f"embedding-atlas {local_file_path} --port {port}"
print(f"\nLaunching Embedding Atlas...")
print(f"Running command: {command}")
print(f"Access the UI in your browser at: http://127.0.0.1:{port}")
os.system(command)