vtrubamacrocosmos commited on
Commit
a000445
·
verified ·
1 Parent(s): aa57274

Update app.py

Browse files

add new sources

Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -60,16 +60,37 @@ st.markdown("Explore massive datasets hosted on Hugging Face, totaling approxima
60
  @st.cache_data
61
  def load_datasets():
62
  return [
63
- {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/icedwind/x_dataset_19", "Number of rows": "332 MLN"},
64
- {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/wenknow/x_dataset", "Number of rows": "9.9 K"},
65
- {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/reddit_dataset_123", "Number of rows": "89 K"},
 
 
 
 
 
 
 
 
66
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/dataverse-scraping/reddit_dataset_192", "Number of rows": "249 MLN"},
67
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/icedwind/reddit_dataset_226", "Number of rows": "303 MLN"},
68
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/reddit_dataset_123", "Number of rows": "1.12 MLN"},
69
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/chris241/reddit_dataset_75", "Number of rows": "132 MLN"},
70
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/wenknow/reddit_dataset_242", "Number of rows": "130 MLN"},
71
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/mgrtsv/reddit_dataset_231", "Number of rows": "31.2 MLN"},
72
- {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/PlanAPlanB/reddit_dataset_9", "Number of rows": "26.9 MLN"}
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ]
74
 
75
  # Load datasets
 
60
  @st.cache_data
61
  def load_datasets():
62
  return [
63
+ # Reddit datasets
64
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/PlanAPlanB/reddit_dataset_69", "Number of rows": "6 MLN"},
65
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/mgrtsv/reddit_dataset_229", "Number of rows": "44,815,182"},
66
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/wenknow/reddit_dataset_88", "Number of rows": "253,506,882"},
67
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/PlanAPlanB/reddit_dataset_218", "Number of rows": "562,042"},
68
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/PlanAPlanB/reddit_dataset_13", "Number of rows": "18,931,749"},
69
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/chris241/reddit_dataset_219", "Number of rows": "227,599,340"},
70
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/icedwind/reddit_dataset_112", "Number of rows": "301,588,714"},
71
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/dataverse-scraping/reddit_dataset_71", "Number of rows": "259,924,884"},
72
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/wenknow/reddit_dataset_209", "Number of rows": "209,698,975"},
73
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/reddit_dataset_218", "Number of rows": "7,064,613"},
74
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/dataverse-scraping/reddit_dataset_192", "Number of rows": "249 MLN"},
75
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/icedwind/reddit_dataset_226", "Number of rows": "303 MLN"},
76
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/reddit_dataset_123", "Number of rows": "1.12 MLN"},
77
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/chris241/reddit_dataset_75", "Number of rows": "132 MLN"},
78
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/wenknow/reddit_dataset_242", "Number of rows": "130 MLN"},
79
  {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/mgrtsv/reddit_dataset_231", "Number of rows": "31.2 MLN"},
80
+ {"Source": "Reddit", "DataSet repo link": "https://huggingface.co/datasets/PlanAPlanB/reddit_dataset_9", "Number of rows": "26.9 MLN"},
81
+
82
+ # X datasets
83
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/littleGuagua/x_dataset_0", "Number of rows": "331,611,777"},
84
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/suul999922/x_dataset_71", "Number of rows": "8,998,828"},
85
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/thayallans/x_dataset_28", "Number of rows": "178,669"},
86
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/apidojo/x_dataset_242", "Number of rows": "499,067"},
87
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/icedwind/x_dataset_112", "Number of rows": "331,500,777"},
88
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/x_dataset_218", "Number of rows": "1,753,878"},
89
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/SAVE0x0/x_dataset_191", "Number of rows": "92,588"},
90
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/johnny8188/x_dataset_187", "Number of rows": "52,762"},
91
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/icedwind/x_dataset_19", "Number of rows": "332 MLN"},
92
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/wenknow/x_dataset", "Number of rows": "9.9 K"},
93
+ {"Source": "X", "DataSet repo link": "https://huggingface.co/datasets/arrmlet/reddit_dataset_123", "Number of rows": "89 K"}
94
  ]
95
 
96
  # Load datasets