Spaces:
Running
Running
| import csv | |
| import os | |
| import shutil | |
| import json | |
| import yaml | |
| from typing import Any, List, Tuple, Union | |
| from copy import deepcopy | |
| from .log import logger | |
| from .others import get_cur_time_str | |
| from .file import ensure_dir | |
| class CSVDataRecord: | |
| """Collect data into CSV file. | |
| Automatically backup existed file which has the same file name to avoid DATA LOST: | |
| ``` | |
| # data lost: all content in ./a-file-contains-important-data.csv will be | |
| # flushed and unrecoverable if it's opened by 'w': | |
| with open('./a-file-contains-important-data.csv', 'w') as f: | |
| # do sth. | |
| ``` | |
| Assuming a scene (actually it was my sad experience): | |
| - The code above is in the top of your experimental code, | |
| - And you've finished this experiment and collected the data into the CSV file. | |
| - After that, if you run this script file again accidentally, then all valuable data will be lost! | |
| :attr:`CSVDataRecord` makes this scene never happen again. | |
| """ | |
| def __init__(self, file_path: str, header: List[str], backup=True): | |
| """Open the file and write CSV header into it. | |
| Args: | |
| file_path (str): Target CSV file path. | |
| header (List[str]): CSV header, like `['name', 'age', 'sex', ...]`. | |
| backup (bool, optional): If True, the existed file in :attr:`file_path` will be backup to `file_path + '.' + cur timestamp`. Defaults to True. | |
| """ | |
| self.file_path = file_path | |
| self.header = header | |
| if backup and os.path.exists(file_path): | |
| backup_file_path = '{}.{}'.format(file_path, get_cur_time_str()) | |
| shutil.copyfile(file_path, backup_file_path) | |
| logger.warn('csv file already exists! backup raw file to {}'.format(backup_file_path)) | |
| ensure_dir(file_path) | |
| with open(file_path, 'w') as f: | |
| writer = csv.writer(f) | |
| writer.writerow(header) | |
| def write(self, data: Union[List[Any], Tuple[Any]]): | |
| """Write a row of data to file in :attr:`file_path`. | |
| Args: | |
| data (Union[List[Any], Tuple[Any]]): A row of data, like `('ekko', 18, 'man')`. | |
| """ | |
| assert len(data) == len(self.header) | |
| with open(self.file_path, 'a') as f: | |
| writer = csv.writer(f) | |
| writer.writerow(data) | |
| def write_json(file_path: str, obj: Any, indent=2, backup=True, ensure_obj_serializable=False): | |
| """Collect data into JSON file. | |
| Automatically backup existed file which has the same file name to avoid DATA LOST. (refers to :class:`CSVDataRecord`) | |
| Args: | |
| file_path (str): Target JSON file path. | |
| obj (Any): Collected data which can be serialized into JSON format. | |
| indent (int, optional): Keep indent to ensure readability. Defaults to 2. | |
| backup (bool, optional): If True, the existed file in :attr:`file_path` will be \ | |
| backup to `file_path + '.' + cur timestamp`. Defaults to True. | |
| """ | |
| if backup and os.path.exists(file_path): | |
| backup_file_path = '{}.{}'.format(file_path, get_cur_time_str()) | |
| shutil.copyfile(file_path, backup_file_path) | |
| logger.warn('json file already exists! backup raw file to {}'.format(backup_file_path)) | |
| ensure_dir(file_path) | |
| if ensure_obj_serializable: | |
| obj = deepcopy(obj) | |
| make_obj_json_serializable(obj) | |
| with open(file_path, 'w', encoding='utf8') as f: | |
| obj_str = json.dumps(obj, indent=indent, ensure_ascii=False) | |
| f.write(obj_str) | |
| def read_json(file_path: str): | |
| """Read JSON file. | |
| Args: | |
| file_path (str): Target JSON file path. | |
| Returns: | |
| Any: The object parsed from the target file. | |
| """ | |
| with open(file_path, 'r', encoding='utf8') as f: | |
| return json.loads(f.read()) | |
| def read_yaml(file_path: str): | |
| """Read YAML file. | |
| Args: | |
| file_path (str): Target YAML file path. | |
| Returns: | |
| Any: The object parsed from the target file. | |
| """ | |
| with open(file_path, 'r') as f: | |
| return yaml.load(f, yaml.Loader) | |
| import inspect | |
| import torch | |
| def make_obj_json_serializable(obj): | |
| for k, v in obj.items(): | |
| if isinstance(v, dict): | |
| obj[k] = make_obj_json_serializable(v) | |
| elif hasattr(v, '__call__'): | |
| obj[k] = inspect.getsource(v) | |
| elif isinstance(v, torch.Tensor): | |
| obj[k] = str(v) | |
| return obj |