broadfield-dev commited on
Commit
8904c8e
·
verified ·
1 Parent(s): c5a0831

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -13
app.py CHANGED
@@ -1,23 +1,43 @@
1
  from datasets import load_dataset
2
  import pandas as pd
3
-
4
  import os
5
- #os.system("pip install git+https://github.com/apple/embedding-atlas.git ")
6
- #from embedding_atlas.widget import EmbeddingAtlasWidget
7
 
8
- # Hardcode the name of the Hugging Face dataset
 
 
9
  dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset"
10
 
11
- # Load the dataset from Hugging Face
12
- # The load_dataset function can download datasets from the Hub. [3, 5, 6]
13
- dataset = load_dataset(dataset_name, split="train")
 
 
14
 
15
- # Convert the dataset to a Pandas DataFrame
16
- # The .to_pandas() method is a convenient way to do this.
 
 
 
 
 
 
 
 
 
 
17
  df = dataset.to_pandas()
18
 
19
- # Show the Embedding Atlas widget for your data frame
20
- # The widget takes a Pandas DataFrame as its primary input. [4]
21
- #EmbeddingAtlasWidget(df)
 
 
 
 
 
 
 
 
 
22
 
23
- os.system(f"embedding-atlas {df} --port 7860")
 
1
  from datasets import load_dataset
2
  import pandas as pd
 
3
  import os
 
 
4
 
5
+ # --- Configuration ---
6
+
7
+ # 1. Hardcode the name of the Hugging Face dataset
8
  dataset_name = "Trendyol/Trendyol-Cybersecurity-Instruction-Tuning-Dataset"
9
 
10
+ # 2. Define the name for the local file where the data will be saved
11
+ local_file_path = "trendyol_cybersecurity_dataset.csv"
12
+
13
+ # 3. Define the port for the Embedding Atlas server
14
+ port = 7860
15
 
16
+ # --- Script Logic ---
17
+
18
+ # Step 1: Load the dataset from Hugging Face
19
+ print(f"Loading dataset '{dataset_name}' from the Hub...")
20
+ try:
21
+ dataset = load_dataset(dataset_name, split="train")
22
+ except Exception as e:
23
+ print(f"Failed to load dataset. Error: {e}")
24
+ exit()
25
+
26
+ # Step 2: Convert the dataset to a Pandas DataFrame
27
+ print("Converting dataset to Pandas DataFrame...")
28
  df = dataset.to_pandas()
29
 
30
+ # Step 3: Save the DataFrame to a local CSV file
31
+ # This is the crucial step. The CLI tool will read from this file.
32
+ # We use index=False to avoid saving the pandas index as an extra column.
33
+ print(f"Saving DataFrame to a local file: '{local_file_path}'")
34
+ df.to_csv(local_file_path, index=False)
35
+ print("Save complete.")
36
+
37
+ # Step 4: Construct and run the CLI command using the LOCAL file path
38
+ command = f"embedding-atlas {local_file_path} --port {port}"
39
+ print(f"\nLaunching Embedding Atlas...")
40
+ print(f"Running command: {command}")
41
+ print(f"Access the UI in your browser at: http://127.0.0.1:{port}")
42
 
43
+ os.system(command)