echarlaix's picture
echarlaix HF Staff
remove need to check whether the model needs to be exported
9a0c2c8
raw
history blame
10.4 kB
import os
import shutil
import gradio as gr
from huggingface_hub import HfApi, whoami, ModelCard, model_info
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from textwrap import dedent
from pathlib import Path
from tempfile import TemporaryDirectory
from huggingface_hub.file_download import repo_folder_name
from optimum.exporters import TasksManager
from optimum.intel import (
OVModelForAudioClassification,
OVModelForCausalLM,
OVModelForFeatureExtraction,
OVModelForImageClassification,
OVModelForMaskedLM,
OVModelForQuestionAnswering,
OVModelForSeq2SeqLM,
OVModelForSequenceClassification,
OVModelForTokenClassification,
OVStableDiffusionPipeline,
OVStableDiffusionXLPipeline,
OVLatentConsistencyModelPipeline,
OVWeightQuantizationConfig,
)
from diffusers import ConfigMixin
_HEAD_TO_AUTOMODELS = {
"feature-extraction": "OVModelForFeatureExtraction",
"fill-mask": "OVModelForMaskedLM",
"text-generation": "OVModelForCausalLM",
"text-classification": "OVModelForSequenceClassification",
"token-classification": "OVModelForTokenClassification",
"question-answering": "OVModelForQuestionAnswering",
"image-classification": "OVModelForImageClassification",
"audio-classification": "OVModelForAudioClassification",
"stable-diffusion": "OVStableDiffusionPipeline",
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
"latent-consistency": "OVLatentConsistencyModelPipeline",
}
def quantize_model(
model_id: str,
dtype: str,
calibration_dataset: str,
ratio: str,
private_repo: bool,
overwritte: bool,
oauth_token: gr.OAuthToken,
):
if oauth_token.token is None:
return "You must be logged in to use this space"
if not model_id:
return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
try:
model_name = model_id.split("/")[-1]
username = whoami(oauth_token.token)["name"]
w_t = dtype.replace("-", "")
suffix = f"{w_t}" if model_name.endswith("openvino") else f"openvino-{w_t}"
new_repo_id = f"{username}/{model_name}-{suffix}"
library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
if library_name == "diffusers":
ConfigMixin.config_name = "model_index.json"
class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
if "xl" in class_name:
task = "stable-diffusion-xl"
elif "consistency" in class_name:
task = "latent-consistency"
else:
task = "stable-diffusion"
else:
task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
if task == "text2text-generation":
return "Export of Seq2Seq models is currently disabled."
if task not in _HEAD_TO_AUTOMODELS:
return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
auto_model_class = _HEAD_TO_AUTOMODELS[task]
if calibration_dataset == "None":
calibration_dataset = None
is_int8 = dtype == "8-bit"
# if library_name == "diffusers":
# quant_method = "hybrid"
if not is_int8 and calibration_dataset is not None:
quant_method = "awq"
else:
if calibration_dataset is not None:
print("Default quantization was selected, calibration dataset won't be used")
quant_method = "default"
quantization_config = OVWeightQuantizationConfig(
bits=8 if is_int8 else 4,
quant_method=quant_method,
dataset=None if quant_method=="default" else calibration_dataset,
ratio=1.0 if is_int8 else ratio,
num_samples=None if quant_method=="default" else 20,
)
api = HfApi(token=oauth_token.token)
if api.repo_exists(new_repo_id) and not overwritte:
return f"Model {new_repo_id} already exist, please tick the overwritte box to push on an existing repository"
with TemporaryDirectory() as d:
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
os.makedirs(folder)
try:
api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
ov_model = eval(auto_model_class).from_pretrained(
model_id,
cache_dir=folder,
token=oauth_token.token,
quantization_config=quantization_config
)
ov_model.save_pretrained(folder)
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
new_repo_id = new_repo_url.repo_id
print("Repository created successfully!", new_repo_url)
folder = Path(folder)
for dir_name in (
"",
"vae_encoder",
"vae_decoder",
"text_encoder",
"text_encoder_2",
"unet",
"tokenizer",
"tokenizer_2",
"scheduler",
"feature_extractor",
):
if not (folder / dir_name).is_dir():
continue
for file_path in (folder / dir_name).iterdir():
if file_path.is_file():
try:
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.join(dir_name, file_path.name),
repo_id=new_repo_id,
)
except Exception as e:
return f"Error uploading file {file_path}: {e}"
try:
card = ModelCard.load(model_id, token=oauth_token.token)
except:
card = ModelCard("")
if card.data.tags is None:
card.data.tags = []
if "openvino" not in card.data.tags:
card.data.tags.append("openvino")
card.data.tags.append("nncf")
card.data.tags.append(dtype)
card.data.base_model = model_id
card.text = dedent(
f"""
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
First make sure you have `optimum-intel` installed:
```bash
pip install optimum[openvino]
```
To load your model you can do as follows:
```python
from optimum.intel import {auto_model_class}
model_id = "{new_repo_id}"
model = {auto_model_class}.from_pretrained(model_id)
```
"""
)
card_path = os.path.join(folder, "README.md")
card.save(card_path)
api.upload_file(
path_or_fileobj=card_path,
path_in_repo="README.md",
repo_id=new_repo_id,
)
return f"This model was successfully quantized, find it under your repository {new_repo_url}"
finally:
shutil.rmtree(folder, ignore_errors=True)
except Exception as e:
return f"### Error: {e}"
DESCRIPTION = """
This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
After conversion, a repository will be pushed under your namespace with the resulting model.
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
"""
model_id = HuggingfaceHubSearch(
label="Hub Model ID",
placeholder="Search for model id on the hub",
search_type="model",
)
dtype = gr.Dropdown(
["8-bit", "4-bit"],
value="8-bit",
label="Weights precision",
filterable=False,
visible=True,
)
"""
quant_method = gr.Dropdown(
["default", "awq", "hybrid"],
value="default",
label="Quantization method",
filterable=False,
visible=True,
)
"""
calibration_dataset = gr.Dropdown(
[
"None",
"wikitext2",
"c4",
"c4-new",
"conceptual_captions",
"laion/220k-GPT4Vision-captions-from-LIVIS",
"laion/filtered-wit",
],
value="None",
label="Calibration dataset",
filterable=False,
visible=True,
)
ratio = gr.Slider(
label="Ratio",
info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
minimum=0.0,
maximum=1.0,
step=0.1,
value=1.0,
)
private_repo = gr.Checkbox(
value=False,
label="Private repository",
info="Create a private repository instead of a public one",
)
overwritte = gr.Checkbox(
value=False,
label="Overwrite repository content",
info="Enable pushing files on existing repositories, potentially overwriting existing files",
)
interface = gr.Interface(
fn=quantize_model,
inputs=[
model_id,
dtype,
calibration_dataset,
ratio,
private_repo,
overwritte,
],
outputs=[
gr.Markdown(label="output"),
],
title="Quantize your model with NNCF",
description=DESCRIPTION,
api_name=False,
)
with gr.Blocks() as demo:
gr.Markdown("You must be logged in to use this space")
gr.LoginButton(min_width=250)
interface.render()
demo.launch()