patrickvonplaten commited on
Commit
d3dd1a7
·
1 Parent(s): 56f4087
Files changed (1) hide show
  1. upload.py +47 -0
upload.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from huggingface_hub import HfApi
3
+ from huggingface_hub import hf_hub_download
4
+ import huggingface_hub
5
+ from huggingface_hub import get_repo_discussions
6
+ from bs4 import BeautifulSoup
7
+ import json
8
+ import os
9
+
10
+ repo_id = "stabilityai/stable-diffusion"
11
+ # repo_id = "huggingface-projects/diffuse-the-rest"
12
+ dataset_repo_id = "triple-t/dummy"
13
+
14
+ print("retrieve images...")
15
+ discussions_list = list(get_repo_discussions(repo_id=repo_id, repo_type="space"))
16
+ all_data = []
17
+ for i, disc in enumerate(discussions_list):
18
+ disc = huggingface_hub.get_discussion_details(repo_id=repo_id, repo_type="space", discussion_num=disc.num)
19
+ page = BeautifulSoup(disc.events[0]._event["data"]["latest"]["raw"])
20
+ image_urls = [link.get('src') for link in page.findAll('img')]
21
+ data = {
22
+ "discussion_number": i,
23
+ "data": {
24
+ "prompt": disc.title,
25
+ "images": image_urls,
26
+ }
27
+ }
28
+ if not image_urls:
29
+ continue
30
+ else:
31
+ all_data.append(data)
32
+
33
+
34
+ file_name = "_".join(repo_id.split("/")) + ".json"
35
+ api = HfApi()
36
+
37
+ path = hf_hub_download(repo_id=dataset_repo_id, filename=file_name, cache_dir="/home/patrick/image_cache", repo_type="dataset")
38
+
39
+ with open(path, "w") as f:
40
+ f.write(json.dumps(all_data, sort_keys=True, indent=4))
41
+
42
+ api.upload_file(
43
+ path_or_fileobj=path,
44
+ path_in_repo=file_name,
45
+ repo_id=dataset_repo_id,
46
+ repo_type="dataset",
47
+ )