File size: 1,911 Bytes
f52daa3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!//home/aaron/gradio_test/bin/python
### ✅ Example: Pull Random Records Based on Dataset Size
#
#Here’s a complete Python example using Hugging Face's `datasets` library:
#
from datasets import load_dataset
import random

# Set seed for reproducibility (optional)
random.seed(42)

# Load dataset from Hugging Face
dataset = load_dataset("ajsbsd/14400")
train_dataset = dataset["train"]

# Get total number of records
total_records = len(train_dataset)
print(f"Total records in dataset: {total_records}\n")

# Ask user how many random samples they want
#num_samples = int(input("How many random records would you like to see? "))
num_samples = total_records

# Ensure valid input
if num_samples <= 0 or num_samples > total_records:
    print(f"Please enter a number between 1 and {total_records}.")
else:
    # Generate random indices
    random_indices = random.sample(range(total_records), num_samples)

    # Print random records
    for i, idx in enumerate(random_indices, 1):
        record = train_dataset[idx]
        print(f"--- Record #{i} (Index: {idx}) ---")
        print(f"ID: {record['id']}")
        print(f"Text:\n{record['text']}\n")

### 🧠 What This Does
#
# Loads the dataset
# Gets the total number of records automatically
# Asks the user how many random entries they want to see
# Picks that many random rows and prints them
#
### 🔁 Example Run
#
#Total records in dataset: 256
#
#How many random records would you like to see? 5
#
#--- Record #1 (Index: 203) ---
#ID: 204
#Text:
#It was the...
#
#--- Record #2 (Index: 15) ---
#ID: 16
#Text:
#The period...
#
#
### 📌 Want to Do This Without User Input?
#
#You can hardcode the number of samples:
#
#num_samples = 5
#
#Or make it part of a function:
#
#
#def get_random_samples(dataset, num_samples):
#    total = len(dataset)
#    indices = random.sample(range(total), num_samples)
#    return [dataset[i] for i in indices]