| import os | |
| import json | |
| import pytz | |
| import logging | |
| import asyncio | |
| from datetime import datetime | |
| from pathlib import Path | |
| import huggingface_hub | |
| from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError | |
| from dotenv import load_dotenv | |
| from git import Repo | |
| from datetime import datetime | |
| from tqdm.auto import tqdm | |
| from tqdm.contrib.logging import logging_redirect_tqdm | |
| from app.config.hf_config import HF_TOKEN, QUEUE_REPO, API, EVAL_REQUESTS_PATH | |
| from app.utils.model_validation import ModelValidator | |
| huggingface_hub.logging.set_verbosity_error() | |
| huggingface_hub.utils.disable_progress_bars() | |
| logging.basicConfig( | |
| level=logging.ERROR, | |
| format='%(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| load_dotenv() | |
| validator = ModelValidator() | |
| def get_changed_files(repo_path, start_date, end_date): | |
| repo = Repo(repo_path) | |
| start = datetime.strptime(start_date, '%Y-%m-%d') | |
| end = datetime.strptime(end_date, '%Y-%m-%d') | |
| changed_files = set() | |
| pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}") | |
| for commit in pbar: | |
| commit_date = datetime.fromtimestamp(commit.committed_date) | |
| pbar.set_postfix_str(f"Commit date: {commit_date}") | |
| if start <= commit_date <= end: | |
| changed_files.update(item.a_path for item in commit.diff(commit.parents[0])) | |
| if commit_date < start: | |
| break | |
| return changed_files | |
| def read_json(repo_path, file): | |
| with open(f"{repo_path}/{file}") as file: | |
| return json.load(file) | |
| def write_json(repo_path, file, content): | |
| with open(f"{repo_path}/{file}", "w") as file: | |
| json.dump(content, file, indent=2) | |
| def main(): | |
| requests_path = "/Users/lozowski/Developer/requests" | |
| start_date = "2024-12-09" | |
| end_date = "2025-01-07" | |
| changed_files = get_changed_files(requests_path, start_date, end_date) | |
| for file in tqdm(changed_files): | |
| try: | |
| request_data = read_json(requests_path, file) | |
| except FileNotFoundError as e: | |
| tqdm.write(f"File {file} not found") | |
| continue | |
| try: | |
| model_info = API.model_info( | |
| repo_id=request_data["model"], | |
| revision=request_data["revision"], | |
| token=HF_TOKEN | |
| ) | |
| except (RepositoryNotFoundError, RevisionNotFoundError) as e: | |
| tqdm.write(f"Model info for {request_data["model"]} not found") | |
| continue | |
| with logging_redirect_tqdm(): | |
| new_model_size, error = asyncio.run(validator.get_model_size( | |
| model_info=model_info, | |
| precision=request_data["precision"], | |
| base_model=request_data["base_model"], | |
| revision=request_data["revision"] | |
| )) | |
| if error: | |
| tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}") | |
| continue | |
| old_model_size = request_data["params"] | |
| if old_model_size != new_model_size: | |
| if new_model_size > 100: | |
| tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}") | |
| tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}") | |
| tqdm.write(f"Updating request file {file}") | |
| request_data["params"] = new_model_size | |
| write_json(requests_path, file, content=request_data) | |
| if __name__ == "__main__": | |
| main() | |