import pyarrow as pa
import pyarrow.parquet as pq
import json
import tempfile


# current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
schema = {
    "username": {"_type": "Value", "dtype": "string"},
    "unit1": {"_type": "Value", "dtype": "float64"},
    "unit2": {"_type": "Value", "dtype": "float64"},
    "unit3": {"_type": "Value", "dtype": "float64"},
    "unit4": {"_type": "Value", "dtype": "float64"},
    "certified": {"_type": "Value", "dtype": "int64"},
}


def to_parquet(
    api,
    repo: str,
    username: str = "",
    unit1: float = 0.0,
    unit2: float = 0.0,
    unit3: float = 0.0,
    unit4: float = 0.0,
    certified: int = 0,
):
    data = {
        "username": username,
        "unit1": unit1 * 100 if unit1 != 0 else 0.0,
        "unit2": unit2 * 100 if unit2 != 0 else 0.0,
        "unit3": unit3 * 100 if unit3 != 0 else 0.0,
        "unit4": unit4 * 100 if unit4 != 0 else 0.0,
        "certified": certified,
    }
    # Export data to Arrow format
    table = pa.Table.from_pylist([data])
    # Add metadata (used by datasets library)
    table = table.replace_schema_metadata(
        {"huggingface": json.dumps({"info": {"features": schema}})}
    )
    # Write to parquet file
    archive_file = tempfile.NamedTemporaryFile(delete=False)
    pq.write_table(table, archive_file.name)
    archive_file.close()

    api.upload_file(
        repo_id=repo,  # manually created repo
        repo_type="dataset",
        path_in_repo=f"{username}.parquet",  # each user will have their own parquet
        path_or_fileobj=archive_file.name,
    )