gradio_test / trainingText.py
ajsbsd's picture
qwen.ai helper
f52daa3
#!//home/aaron/gradio_test/bin/python
### โœ… Example: Pull Random Records Based on Dataset Size
#
#Hereโ€™s a complete Python example using Hugging Face's `datasets` library:
#
from datasets import load_dataset
import random
# Set seed for reproducibility (optional)
random.seed(42)
# Load dataset from Hugging Face
dataset = load_dataset("ajsbsd/14400")
train_dataset = dataset["train"]
# Get total number of records
total_records = len(train_dataset)
print(f"Total records in dataset: {total_records}\n")
# Ask user how many random samples they want
#num_samples = int(input("How many random records would you like to see? "))
num_samples = total_records
# Ensure valid input
if num_samples <= 0 or num_samples > total_records:
print(f"Please enter a number between 1 and {total_records}.")
else:
# Generate random indices
random_indices = random.sample(range(total_records), num_samples)
# Print random records
for i, idx in enumerate(random_indices, 1):
record = train_dataset[idx]
print(f"--- Record #{i} (Index: {idx}) ---")
print(f"ID: {record['id']}")
print(f"Text:\n{record['text']}\n")
### ๐Ÿง  What This Does
#
# Loads the dataset
# Gets the total number of records automatically
# Asks the user how many random entries they want to see
# Picks that many random rows and prints them
#
### ๐Ÿ” Example Run
#
#Total records in dataset: 256
#
#How many random records would you like to see? 5
#
#--- Record #1 (Index: 203) ---
#ID: 204
#Text:
#It was the...
#
#--- Record #2 (Index: 15) ---
#ID: 16
#Text:
#The period...
#
#
### ๐Ÿ“Œ Want to Do This Without User Input?
#
#You can hardcode the number of samples:
#
#num_samples = 5
#
#Or make it part of a function:
#
#
#def get_random_samples(dataset, num_samples):
# total = len(dataset)
# indices = random.sample(range(total), num_samples)
# return [dataset[i] for i in indices]