from datasets import load_dataset import pandas as pd import os # --- Configuration --- # 1. Hardcode the name of the Hugging Face dataset dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset" # 2. Define the name for the local file where the data will be saved local_file_path = "trendyol_cybersecurity_dataset.csv" # 3. Define the port for the Embedding Atlas server port = 7860 # --- Script Logic --- # Step 1: Load the dataset from Hugging Face print(f"Loading dataset '{dataset_name}' from the Hub...") try: dataset = load_dataset(dataset_name, split="train") except Exception as e: print(f"Failed to load dataset. Error: {e}") exit() # Step 2: Convert the dataset to a Pandas DataFrame print("Converting dataset to Pandas DataFrame...") df = dataset.to_pandas() # Step 3: Save the DataFrame to a local CSV file # This is the crucial step. The CLI tool will read from this file. # We use index=False to avoid saving the pandas index as an extra column. print(f"Saving DataFrame to a local file: '{local_file_path}'") df.to_csv(local_file_path, index=False) print("Save complete.") # Step 4: Construct and run the CLI command using the LOCAL file path command = f"embedding-atlas {local_file_path} --port {port}" print(f"\nLaunching Embedding Atlas...") print(f"Running command: {command}") print(f"Access the UI in your browser at: http://127.0.0.1:{port}") os.system(command)