Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,43 @@
|
|
1 |
from datasets import load_dataset
|
2 |
import pandas as pd
|
3 |
-
|
4 |
import os
|
5 |
-
#os.system("pip install git+https://github.com/apple/embedding-atlas.git ")
|
6 |
-
#from embedding_atlas.widget import EmbeddingAtlasWidget
|
7 |
|
8 |
-
#
|
|
|
|
|
9 |
dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset"
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
-
|
|
|
|
|
14 |
|
15 |
-
#
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
df = dataset.to_pandas()
|
18 |
|
19 |
-
#
|
20 |
-
#
|
21 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
os.system(
|
|
|
1 |
from datasets import load_dataset
|
2 |
import pandas as pd
|
|
|
3 |
import os
|
|
|
|
|
4 |
|
5 |
+
# --- Configuration ---
|
6 |
+
|
7 |
+
# 1. Hardcode the name of the Hugging Face dataset
|
8 |
dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset"
|
9 |
|
10 |
+
# 2. Define the name for the local file where the data will be saved
|
11 |
+
local_file_path = "trendyol_cybersecurity_dataset.csv"
|
12 |
+
|
13 |
+
# 3. Define the port for the Embedding Atlas server
|
14 |
+
port = 7860
|
15 |
|
16 |
+
# --- Script Logic ---
|
17 |
+
|
18 |
+
# Step 1: Load the dataset from Hugging Face
|
19 |
+
print(f"Loading dataset '{dataset_name}' from the Hub...")
|
20 |
+
try:
|
21 |
+
dataset = load_dataset(dataset_name, split="train")
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Failed to load dataset. Error: {e}")
|
24 |
+
exit()
|
25 |
+
|
26 |
+
# Step 2: Convert the dataset to a Pandas DataFrame
|
27 |
+
print("Converting dataset to Pandas DataFrame...")
|
28 |
df = dataset.to_pandas()
|
29 |
|
30 |
+
# Step 3: Save the DataFrame to a local CSV file
|
31 |
+
# This is the crucial step. The CLI tool will read from this file.
|
32 |
+
# We use index=False to avoid saving the pandas index as an extra column.
|
33 |
+
print(f"Saving DataFrame to a local file: '{local_file_path}'")
|
34 |
+
df.to_csv(local_file_path, index=False)
|
35 |
+
print("Save complete.")
|
36 |
+
|
37 |
+
# Step 4: Construct and run the CLI command using the LOCAL file path
|
38 |
+
command = f"embedding-atlas {local_file_path} --port {port}"
|
39 |
+
print(f"\nLaunching Embedding Atlas...")
|
40 |
+
print(f"Running command: {command}")
|
41 |
+
print(f"Access the UI in your browser at: http://127.0.0.1:{port}")
|
42 |
|
43 |
+
os.system(command)
|