Spaces:

ThomasSimonini
/

Check-my-progress-Deep-RL-Course

Running

App Files Files Community

ThomasSimonini commited on Jan 5, 2023

Commit

788d88f

1 Parent(s): 99140f2

Create app.py

Browse files

Files changed (1) hide show

app.py +190 -0

app.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import gradio as gr
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.repocard import metadata_load
+api = HfApi()
+def get_user_models(hf_username, env_tag, lib_tag):
+    """
+    List the Reinforcement Learning models
+    from user given environment and lib
+    :param hf_username: User HF username
+    :param env_tag: Environment tag
+    :param lib_tag: Library tag
+    """
+    api = HfApi()
+    models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])
+    user_model_ids = [x.modelId for x in models]
+    return user_model_ids
+def get_metadata(model_id):
+  """
+  Get model metadata (contains evaluation data)
+  :param model_id
+  """
+  try:
+    readme_path = hf_hub_download(model_id, filename="README.md")
+    return metadata_load(readme_path)
+  except requests.exceptions.HTTPError:
+    # 404 README.md not found
+    return None
+def parse_metrics_accuracy(meta):
+  """
+  Get model results and parse it
+  :param meta: model metadata
+  """
+  if "model-index" not in meta:
+    return None
+  result = meta["model-index"][0]["results"]
+  metrics = result[0]["metrics"]
+  accuracy = metrics[0]["value"]
+  return accuracy
+def parse_rewards(accuracy):
+  """
+  Parse mean_reward and std_reward
+  :param accuracy: model results
+  """
+  default_std = -1000
+  default_reward= -1000
+  if accuracy !=  None:
+      accuracy = str(accuracy)
+      parsed =  accuracy.split(' +/- ')
+      if len(parsed)>1:
+          mean_reward = float(parsed[0])
+          std_reward =  float(parsed[1])
+      elif len(parsed)==1: #only mean reward
+          mean_reward = float(parsed[0])
+          std_reward =  float(0)
+      else:
+          mean_reward = float(default_std)
+          std_reward = float(default_reward)
+  else:
+      mean_reward = float(default_std)
+      std_reward = float(default_reward)
+  return mean_reward, std_reward
+def calculate_best_result(user_model_ids):
+  """
+  Calculate the best results of a unit
+  best_result = mean_reward - std_reward
+  :param user_model_ids: RL models of a user
+  """
+  best_result = -100
+  best_model_id = ""
+  for model in user_model_ids:
+    meta = get_metadata(model)
+    if meta is None:
+      continue
+    accuracy = parse_metrics_accuracy(meta)
+    mean_reward, std_reward = parse_rewards(accuracy)
+    result = mean_reward - std_reward
+    if result > best_result:
+      best_result = result
+      best_model_id = model
+  return best_result, best_model_id
+def check_if_passed(model):
+  """
+  Check if result >= baseline
+  to know if you pass
+  :param model: user model
+  """
+  if model["best_result"] >= model["min_result"]:
+    model["passed"] = True
+def test_(hf_username):
+  results_certification = [
+      {
+          "unit": "Unit 1: Introduction to Deep Reinforcement Learning",
+          "env": "LunarLander-v2",
+          "library": "stable-baselines3",
+          "min_result": 200,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+      },
+    {
+          "unit": "Bonus Unit 1: Introduction to Deep RL with Huggy",
+          "env": "ML-Agents-Huggy",
+          "library": "ml-agents",
+          "min_result": -100,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+    },
+  {
+          "unit": "Introduction to Q-Learning",
+          "env": "Taxi-v3",
+          "library": "q-learning",
+          "min_result": 5,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+  },
+  {
+          "unit": "Introduction to Deep Q-Learning",
+          "env": "SpaceInvadersNoFrameskip-v4",
+          "library": "stable-baselines3",
+          "min_result": 200,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+  },
+  {
+          "unit": "Unit 4: Policy Gradient with PyTorch, 1/2",
+          "env": "CartPole-v1",
+          "library": "reinforce",
+          "min_result": 350,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+  },
+    {
+          "unit": "Unit 4: Policy Gradient with PyTorch, 2/2",
+          "env": "Pixelcopter-PLE-v0",
+          "library": "reinforce",
+          "min_result": 5,
+          "best_result": 0,
+          "best_model_id": "",
+          "passed": False
+    },
+  ]
+  for unit in results_certification:
+    # Get user model
+    user_models = get_user_models(hf_username, unit['env'], unit['library'])
+    print(user_models)
+    # Calculate the best result and get the best_model_id
+    best_result, best_model_id = calculate_best_result(user_models)
+    # Save best_result and best_model_id
+    unit["best_result"] = best_result
+    unit["best_model_id"] = best_model_id
+    # Based on best_result do we pass the unit?
+    check_if_passed(unit)
+  print(results_certification)
+  df = pd.DataFrame (results_certification)
+  return df
+with gr.Blocks() as demo:
+  hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username")
+  email = gr.Textbox(placeholder="[email protected]", label="Your Email (to receive your certificate)")
+  check_progress_button = gr.Button(value="Check my progress")
+  output = gr.components.Dataframe(headers=["Unit", "Environment", "Library", "Baseline", "Your best result", "Your best model id", "Pass?"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"])
+  check_progress_button.click(fn=test_, inputs=hf_username, outputs=output)
+demo.launch()