Spaces:
Running
Running
File size: 1,911 Bytes
f52daa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
#!//home/aaron/gradio_test/bin/python
### ✅ Example: Pull Random Records Based on Dataset Size
#
#Here’s a complete Python example using Hugging Face's `datasets` library:
#
from datasets import load_dataset
import random
# Set seed for reproducibility (optional)
random.seed(42)
# Load dataset from Hugging Face
dataset = load_dataset("ajsbsd/14400")
train_dataset = dataset["train"]
# Get total number of records
total_records = len(train_dataset)
print(f"Total records in dataset: {total_records}\n")
# Ask user how many random samples they want
#num_samples = int(input("How many random records would you like to see? "))
num_samples = total_records
# Ensure valid input
if num_samples <= 0 or num_samples > total_records:
print(f"Please enter a number between 1 and {total_records}.")
else:
# Generate random indices
random_indices = random.sample(range(total_records), num_samples)
# Print random records
for i, idx in enumerate(random_indices, 1):
record = train_dataset[idx]
print(f"--- Record #{i} (Index: {idx}) ---")
print(f"ID: {record['id']}")
print(f"Text:\n{record['text']}\n")
### 🧠 What This Does
#
# Loads the dataset
# Gets the total number of records automatically
# Asks the user how many random entries they want to see
# Picks that many random rows and prints them
#
### 🔁 Example Run
#
#Total records in dataset: 256
#
#How many random records would you like to see? 5
#
#--- Record #1 (Index: 203) ---
#ID: 204
#Text:
#It was the...
#
#--- Record #2 (Index: 15) ---
#ID: 16
#Text:
#The period...
#
#
### 📌 Want to Do This Without User Input?
#
#You can hardcode the number of samples:
#
#num_samples = 5
#
#Or make it part of a function:
#
#
#def get_random_samples(dataset, num_samples):
# total = len(dataset)
# indices = random.sample(range(total), num_samples)
# return [dataset[i] for i in indices]
|