""" Module which updates any of the issues to reflect changes in the issue state """ import argparse import json import logging import os import numpy as np import requests from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) JSON_FILE = "issues.json" def update_issues( input_filename=JSON_FILE, output_filename=JSON_FILE, github_api_version=GITHUB_API_VERSION, owner=OWNER, repo=REPO, token=TOKEN, n_pages=-1, ): """ Function to get the issues from the transformers repo and save them to a json file """ with open("issues_dict.json", "r") as f: issues = json.load(f) # Get most recent updated at information updated_at = [issue["updated_at"] for issue in issues.values()] most_recent = max(updated_at) # If file exists and we want to overwrite it, delete it if not os.path.exists(output_filename): raise ValueError(f"File {output_filename} does not exist") # Define the URL and headers url = f"https://api.github.com/repos/{owner}/{repo}/issues" headers = { "Accept": "application/vnd.github+json", "Authorization": f"{token}", "X-GitHub-Api-Version": f"{github_api_version}", "User-Agent": "amyeroberts", } per_page = 100 page = 1 query_params = { "state": "all", "since": most_recent, "sort": "created", "direction": "asc", "page": page, } new_lines = [] page_limit = (n_pages + page) if n_pages > 0 else np.inf while True: if page >= page_limit: break # Send the GET request response = requests.get(url, headers=headers, params=query_params) if not response.status_code == 200: raise ValueError( f"Request failed with status code {response.status_code} and message {response.text}" ) json_response = response.json() logger.info(f"Page: {page}, number of issues: {len(json_response)}") # If we get an empty response, we've reached the end of the issues if len(json_response) == 0: break new_lines.extend(json_response) # If we get less than the number of issues per page, we've reached the end of the issues if len(json_response) < per_page: break page += 1 query_params["page"] = page issue_lines_map = {issue["number"]: issue for issue in new_lines} updated_issues = [] # Update any issues that already exist with open(input_filename, "r") as f: with open("tmp_" + output_filename, "a") as g: for line in f: issue = json.loads(line) number = issue["number"] if number in issue_lines_map: g.write(json.dumps(issue_lines_map[number])) g.write("\n") updated_issues.append(number) else: g.write(line) # Append any new issues new_issues = [issue for issue in new_lines if issue["number"] not in updated_issues] with open("tmp_" + output_filename, "a") as g: for issue in new_issues: g.write(json.dumps(issue)) g.write("\n") # Overwrite the old file with the new file os.rename("tmp_" + output_filename, output_filename) # Save a record of the updated issues for the embedding update with open("updated_issues.json", "w") as f: json.dump(issue_lines_map, f, indent=4, sort_keys=True) return output_filename if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_filename", type=str, default=JSON_FILE) parser.add_argument("--output_filename", type=str, default=JSON_FILE) parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) parser.add_argument("--owner", type=str, default=OWNER) parser.add_argument("--repo", type=str, default=REPO) parser.add_argument("--token", type=str, default=TOKEN) parser.add_argument("--n_pages", type=int, default=-1) args = parser.parse_args() update_issues(**vars(args))