ThomasSimonini HF staff commited on
Commit
788d88f
·
1 Parent(s): 99140f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -0
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import HfApi, hf_hub_download
3
+ from huggingface_hub.repocard import metadata_load
4
+
5
+ api = HfApi()
6
+
7
+ def get_user_models(hf_username, env_tag, lib_tag):
8
+ """
9
+ List the Reinforcement Learning models
10
+ from user given environment and lib
11
+ :param hf_username: User HF username
12
+ :param env_tag: Environment tag
13
+ :param lib_tag: Library tag
14
+ """
15
+ api = HfApi()
16
+ models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])
17
+
18
+ user_model_ids = [x.modelId for x in models]
19
+ return user_model_ids
20
+
21
+
22
+ def get_metadata(model_id):
23
+ """
24
+ Get model metadata (contains evaluation data)
25
+ :param model_id
26
+ """
27
+ try:
28
+ readme_path = hf_hub_download(model_id, filename="README.md")
29
+ return metadata_load(readme_path)
30
+ except requests.exceptions.HTTPError:
31
+ # 404 README.md not found
32
+ return None
33
+
34
+
35
+ def parse_metrics_accuracy(meta):
36
+ """
37
+ Get model results and parse it
38
+ :param meta: model metadata
39
+ """
40
+ if "model-index" not in meta:
41
+ return None
42
+ result = meta["model-index"][0]["results"]
43
+ metrics = result[0]["metrics"]
44
+ accuracy = metrics[0]["value"]
45
+
46
+ return accuracy
47
+
48
+
49
+ def parse_rewards(accuracy):
50
+ """
51
+ Parse mean_reward and std_reward
52
+ :param accuracy: model results
53
+ """
54
+ default_std = -1000
55
+ default_reward= -1000
56
+ if accuracy != None:
57
+ accuracy = str(accuracy)
58
+ parsed = accuracy.split(' +/- ')
59
+ if len(parsed)>1:
60
+ mean_reward = float(parsed[0])
61
+ std_reward = float(parsed[1])
62
+ elif len(parsed)==1: #only mean reward
63
+ mean_reward = float(parsed[0])
64
+ std_reward = float(0)
65
+ else:
66
+ mean_reward = float(default_std)
67
+ std_reward = float(default_reward)
68
+ else:
69
+ mean_reward = float(default_std)
70
+ std_reward = float(default_reward)
71
+
72
+ return mean_reward, std_reward
73
+
74
+ def calculate_best_result(user_model_ids):
75
+ """
76
+ Calculate the best results of a unit
77
+ best_result = mean_reward - std_reward
78
+ :param user_model_ids: RL models of a user
79
+ """
80
+ best_result = -100
81
+ best_model_id = ""
82
+ for model in user_model_ids:
83
+ meta = get_metadata(model)
84
+ if meta is None:
85
+ continue
86
+ accuracy = parse_metrics_accuracy(meta)
87
+ mean_reward, std_reward = parse_rewards(accuracy)
88
+ result = mean_reward - std_reward
89
+ if result > best_result:
90
+ best_result = result
91
+ best_model_id = model
92
+
93
+ return best_result, best_model_id
94
+
95
+ def check_if_passed(model):
96
+ """
97
+ Check if result >= baseline
98
+ to know if you pass
99
+ :param model: user model
100
+ """
101
+ if model["best_result"] >= model["min_result"]:
102
+ model["passed"] = True
103
+
104
+ def test_(hf_username):
105
+ results_certification = [
106
+ {
107
+ "unit": "Unit 1: Introduction to Deep Reinforcement Learning",
108
+ "env": "LunarLander-v2",
109
+ "library": "stable-baselines3",
110
+ "min_result": 200,
111
+ "best_result": 0,
112
+ "best_model_id": "",
113
+ "passed": False
114
+ },
115
+ {
116
+ "unit": "Bonus Unit 1: Introduction to Deep RL with Huggy",
117
+ "env": "ML-Agents-Huggy",
118
+ "library": "ml-agents",
119
+ "min_result": -100,
120
+ "best_result": 0,
121
+ "best_model_id": "",
122
+ "passed": False
123
+ },
124
+ {
125
+ "unit": "Introduction to Q-Learning",
126
+ "env": "Taxi-v3",
127
+ "library": "q-learning",
128
+ "min_result": 5,
129
+ "best_result": 0,
130
+ "best_model_id": "",
131
+ "passed": False
132
+ },
133
+ {
134
+ "unit": "Introduction to Deep Q-Learning",
135
+ "env": "SpaceInvadersNoFrameskip-v4",
136
+ "library": "stable-baselines3",
137
+ "min_result": 200,
138
+ "best_result": 0,
139
+ "best_model_id": "",
140
+ "passed": False
141
+ },
142
+ {
143
+ "unit": "Unit 4: Policy Gradient with PyTorch, 1/2",
144
+ "env": "CartPole-v1",
145
+ "library": "reinforce",
146
+ "min_result": 350,
147
+ "best_result": 0,
148
+ "best_model_id": "",
149
+ "passed": False
150
+ },
151
+ {
152
+ "unit": "Unit 4: Policy Gradient with PyTorch, 2/2",
153
+ "env": "Pixelcopter-PLE-v0",
154
+ "library": "reinforce",
155
+ "min_result": 5,
156
+ "best_result": 0,
157
+ "best_model_id": "",
158
+ "passed": False
159
+ },
160
+ ]
161
+ for unit in results_certification:
162
+ # Get user model
163
+ user_models = get_user_models(hf_username, unit['env'], unit['library'])
164
+ print(user_models)
165
+ # Calculate the best result and get the best_model_id
166
+ best_result, best_model_id = calculate_best_result(user_models)
167
+
168
+ # Save best_result and best_model_id
169
+ unit["best_result"] = best_result
170
+ unit["best_model_id"] = best_model_id
171
+
172
+ # Based on best_result do we pass the unit?
173
+ check_if_passed(unit)
174
+
175
+ print(results_certification)
176
+
177
+ df = pd.DataFrame (results_certification)
178
+
179
+ return df
180
+
181
+
182
+ with gr.Blocks() as demo:
183
+ hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username")
184
+ email = gr.Textbox(placeholder="[email protected]", label="Your Email (to receive your certificate)")
185
+ check_progress_button = gr.Button(value="Check my progress")
186
+ output = gr.components.Dataframe(headers=["Unit", "Environment", "Library", "Baseline", "Your best result", "Your best model id", "Pass?"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"])
187
+
188
+ check_progress_button.click(fn=test_, inputs=hf_username, outputs=output)
189
+
190
+ demo.launch()