Spaces:
Running
Running
#!//home/aaron/gradio_test/bin/python | |
### โ Example: Pull Random Records Based on Dataset Size | |
# | |
#Hereโs a complete Python example using Hugging Face's `datasets` library: | |
# | |
from datasets import load_dataset | |
import random | |
# Set seed for reproducibility (optional) | |
random.seed(42) | |
# Load dataset from Hugging Face | |
dataset = load_dataset("ajsbsd/14400") | |
train_dataset = dataset["train"] | |
# Get total number of records | |
total_records = len(train_dataset) | |
print(f"Total records in dataset: {total_records}\n") | |
# Ask user how many random samples they want | |
#num_samples = int(input("How many random records would you like to see? ")) | |
num_samples = total_records | |
# Ensure valid input | |
if num_samples <= 0 or num_samples > total_records: | |
print(f"Please enter a number between 1 and {total_records}.") | |
else: | |
# Generate random indices | |
random_indices = random.sample(range(total_records), num_samples) | |
# Print random records | |
for i, idx in enumerate(random_indices, 1): | |
record = train_dataset[idx] | |
print(f"--- Record #{i} (Index: {idx}) ---") | |
print(f"ID: {record['id']}") | |
print(f"Text:\n{record['text']}\n") | |
### ๐ง What This Does | |
# | |
# Loads the dataset | |
# Gets the total number of records automatically | |
# Asks the user how many random entries they want to see | |
# Picks that many random rows and prints them | |
# | |
### ๐ Example Run | |
# | |
#Total records in dataset: 256 | |
# | |
#How many random records would you like to see? 5 | |
# | |
#--- Record #1 (Index: 203) --- | |
#ID: 204 | |
#Text: | |
#It was the... | |
# | |
#--- Record #2 (Index: 15) --- | |
#ID: 16 | |
#Text: | |
#The period... | |
# | |
# | |
### ๐ Want to Do This Without User Input? | |
# | |
#You can hardcode the number of samples: | |
# | |
#num_samples = 5 | |
# | |
#Or make it part of a function: | |
# | |
# | |
#def get_random_samples(dataset, num_samples): | |
# total = len(dataset) | |
# indices = random.sample(range(total), num_samples) | |
# return [dataset[i] for i in indices] | |