Spaces:

OpenVINO
/

nncf-quantization

Running

File size: 6,750 Bytes

import os
import shutil
import gradio as gr
from huggingface_hub import HfApi, whoami, ModelCard
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from textwrap import dedent


from tempfile import TemporaryDirectory

from huggingface_hub.file_download import repo_folder_name
from optimum.exporters.tasks import TasksManager
from optimum.intel.utils.constant import _TASK_ALIASES
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
from optimum.exporters import TasksManager

from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
from optimum.intel import (
    OVModelForAudioClassification,
    OVModelForCausalLM,
    OVModelForFeatureExtraction,
    OVModelForImageClassification,
    OVModelForMaskedLM,
    OVModelForQuestionAnswering,
    OVModelForSeq2SeqLM,
    OVModelForSequenceClassification,
    OVModelForTokenClassification,
    OVStableDiffusionPipeline,
    OVStableDiffusionXLPipeline,
    OVLatentConsistencyModelPipeline,
    OVModelForPix2Struct,
    OVWeightQuantizationConfig,
)

HF_TOKEN = os.environ.get("HF_TOKEN")


def process_model(
    model_id: str,
    dtype: str,
    private_repo: bool,
    task: str,
    oauth_token: gr.OAuthToken,
):
    if oauth_token.token is None:
        raise ValueError("You must be logged in to use this space")

    model_name = model_id.split("/")[-1]
    username = whoami(oauth_token.token)["name"]
    new_repo_id = f"{username}/{model_name}-openvino-{dtype}"

    task = TasksManager.map_from_synonym(task)
    if task == "auto":
        try:
            task = TasksManager.infer_task_from_model(model_id)
        except Exception as e:
            raise ValueError(
                "The task could not be automatically inferred. "
                f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}.  {e}"
            )

    task = _TASK_ALIASES.get(task, task)
    if task not in _HEAD_TO_AUTOMODELS:
        raise ValueError(
            f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
        )

    if task == "text2text-generation":
        raise ValueError("Export of Seq2Seq models is currently disabled.")

    auto_model_class = _HEAD_TO_AUTOMODELS[task]
    pattern = r"(.*)?openvino(.*)?\_model.xml"
    ov_files = _find_files_matching_pattern(
        model_id, pattern, use_auth_token=oauth_token.token
    )
    export = len(ov_files) == 0
    quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
    api = HfApi(token=oauth_token.token)

    with TemporaryDirectory() as d:
        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
        os.makedirs(folder)
        try:
            api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])

            ov_model = eval(auto_model_class).from_pretrained(
                model_id, export=export, quantization_config=quantization_config
            )
            ov_model.save_pretrained(folder)

            new_repo_url = api.create_repo(
                repo_id=new_repo_id, exist_ok=True, private=private_repo
            )
            new_repo_id = new_repo_url.repo_id
            print("Repo created successfully!", new_repo_url)

            file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))

            for file in file_names:
                file_path = os.path.join(folder, file)
                try:
                    api.upload_file(
                        path_or_fileobj=file_path,
                        path_in_repo=file,
                        repo_id=new_repo_id,
                    )

                except Exception as e:
                    raise Exception(f"Error uploading file {file_path}: {e}")

            try:
                card = ModelCard.load(model_id, token=oauth_token.token)
            except:
                card = ModelCard("")

            if card.data.tags is None:
                card.data.tags = []
            card.data.tags.append("openvino")
            card.data.base_model = model_id
            card.text = dedent(
                f"""
                This model was exported to OpenVINO from [`{model_id}`](https://huggingface.co/{model_id}) using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
                
                Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
    
                First make sure you have optimum-intel installed:

                ```bash
                pip install optimum[openvino]
                ```

                To load your model you can do as follows:

                ```python
                from optimum.intel import {auto_model_class}

                model_id = {new_repo_id}
                model = {auto_model_class}.from_pretrained(model_id)
                ```
                """
            )
            card_path = os.path.join(folder, "README.md")
            card.save(card_path)

            api.upload_file(
                path_or_fileobj=card_path,
                path_in_repo="README.md",
                repo_id=new_repo_id,
            )
            return f"Uploaded successfully with {dtype} option! Find your repo <a href='{new_repo_url}'"
        finally:
            shutil.rmtree(folder, ignore_errors=True)


with gr.Blocks() as demo:
    gr.Markdown("You must be logged in to use this space")
    gr.LoginButton(min_width=250)

    model_id = HuggingfaceHubSearch(
        label="Hub Model ID",
        placeholder="Search for model id on the hub",
        search_type="model",
    )
    dtype = gr.Dropdown(
        ["int8", "int4"],
        value="int8",
        label="Precision data types",
        filterable=False,
        visible=True,
    )
    private_repo = gr.Checkbox(
        value=False,
        label="Private Repo",
        info="Create a private repo under your username",
    )
    task = gr.Textbox(
        value="auto",
        label="Task : can be left to auto, will be automatically inferred",
    )
    interface = gr.Interface(
        fn=process_model,
        inputs=[
            model_id,
            dtype,
            private_repo,
            task,
        ],
        outputs=[
            gr.Markdown(label="output"),
        ],
        title="Quantize your model with OpenVINO NNCF ⚡!",
        description="The space takes an HF repo as an input, quantize it and export it to OpenVINO, then push it to a repo under your HF user namespace.",
        api_name=False,
    )

    interface.render()

demo.launch()