Spaces:
Running
Running
rephrase description
Browse files
app.py
CHANGED
@@ -95,11 +95,13 @@ def quantize_model(
|
|
95 |
calibration_dataset = None
|
96 |
|
97 |
is_int8 = dtype == "int8"
|
98 |
-
if library_name == "diffusers":
|
99 |
-
|
100 |
-
|
101 |
quant_method = "awq"
|
102 |
else:
|
|
|
|
|
103 |
quant_method = "default"
|
104 |
|
105 |
quantization_config = OVWeightQuantizationConfig(
|
@@ -112,7 +114,7 @@ def quantize_model(
|
|
112 |
|
113 |
api = HfApi(token=oauth_token.token)
|
114 |
if api.repo_exists(new_repo_id) and not overwritte:
|
115 |
-
return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing
|
116 |
|
117 |
with TemporaryDirectory() as d:
|
118 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
@@ -130,7 +132,7 @@ def quantize_model(
|
|
130 |
ov_model.save_pretrained(folder)
|
131 |
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
|
132 |
new_repo_id = new_repo_url.repo_id
|
133 |
-
print("
|
134 |
|
135 |
folder = Path(folder)
|
136 |
for dir_name in (
|
@@ -169,9 +171,9 @@ def quantize_model(
|
|
169 |
card.data.base_model = model_id
|
170 |
card.text = dedent(
|
171 |
f"""
|
172 |
-
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and
|
173 |
|
174 |
-
First make sure you have optimum-intel installed:
|
175 |
|
176 |
```bash
|
177 |
pip install optimum[openvino]
|
@@ -195,16 +197,16 @@ def quantize_model(
|
|
195 |
path_in_repo="README.md",
|
196 |
repo_id=new_repo_id,
|
197 |
)
|
198 |
-
return f"This model was successfully quantized, find it under your
|
199 |
finally:
|
200 |
shutil.rmtree(folder, ignore_errors=True)
|
201 |
except Exception as e:
|
202 |
return f"### Error: {e}"
|
203 |
|
204 |
DESCRIPTION = """
|
205 |
-
This Space uses [Optimum Intel](https://
|
206 |
|
207 |
-
|
208 |
|
209 |
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
|
210 |
"""
|
@@ -215,9 +217,9 @@ model_id = HuggingfaceHubSearch(
|
|
215 |
search_type="model",
|
216 |
)
|
217 |
dtype = gr.Dropdown(
|
218 |
-
["
|
219 |
-
value="
|
220 |
-
label="
|
221 |
filterable=False,
|
222 |
visible=True,
|
223 |
)
|
@@ -255,13 +257,13 @@ ratio = gr.Slider(
|
|
255 |
)
|
256 |
private_repo = gr.Checkbox(
|
257 |
value=False,
|
258 |
-
label="Private
|
259 |
-
info="Create a private
|
260 |
)
|
261 |
overwritte = gr.Checkbox(
|
262 |
value=False,
|
263 |
-
label="Overwrite
|
264 |
-
info="Enable pushing files on existing
|
265 |
)
|
266 |
interface = gr.Interface(
|
267 |
fn=quantize_model,
|
|
|
95 |
calibration_dataset = None
|
96 |
|
97 |
is_int8 = dtype == "int8"
|
98 |
+
# if library_name == "diffusers":
|
99 |
+
# quant_method = "hybrid"
|
100 |
+
if not is_int8 and calibration_dataset is not None:
|
101 |
quant_method = "awq"
|
102 |
else:
|
103 |
+
if calibration_dataset is not None:
|
104 |
+
print("Default quantization was selected, calibration dataset won't be used")
|
105 |
quant_method = "default"
|
106 |
|
107 |
quantization_config = OVWeightQuantizationConfig(
|
|
|
114 |
|
115 |
api = HfApi(token=oauth_token.token)
|
116 |
if api.repo_exists(new_repo_id) and not overwritte:
|
117 |
+
return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repository"
|
118 |
|
119 |
with TemporaryDirectory() as d:
|
120 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
|
|
132 |
ov_model.save_pretrained(folder)
|
133 |
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
|
134 |
new_repo_id = new_repo_url.repo_id
|
135 |
+
print("Repository created successfully!", new_repo_url)
|
136 |
|
137 |
folder = Path(folder)
|
138 |
for dir_name in (
|
|
|
171 |
card.data.base_model = model_id
|
172 |
card.text = dedent(
|
173 |
f"""
|
174 |
+
This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
|
175 |
|
176 |
+
First make sure you have `optimum-intel` installed:
|
177 |
|
178 |
```bash
|
179 |
pip install optimum[openvino]
|
|
|
197 |
path_in_repo="README.md",
|
198 |
repo_id=new_repo_id,
|
199 |
)
|
200 |
+
return f"This model was successfully quantized, find it under your repository {new_repo_url}"
|
201 |
finally:
|
202 |
shutil.rmtree(folder, ignore_errors=True)
|
203 |
except Exception as e:
|
204 |
return f"### Error: {e}"
|
205 |
|
206 |
DESCRIPTION = """
|
207 |
+
This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
|
208 |
|
209 |
+
After conversion, a repository will be pushed under your namespace with the resulting model.
|
210 |
|
211 |
The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
|
212 |
"""
|
|
|
217 |
search_type="model",
|
218 |
)
|
219 |
dtype = gr.Dropdown(
|
220 |
+
["8-bit", "4-bit"],
|
221 |
+
value="8-bit",
|
222 |
+
label="Weights precision",
|
223 |
filterable=False,
|
224 |
visible=True,
|
225 |
)
|
|
|
257 |
)
|
258 |
private_repo = gr.Checkbox(
|
259 |
value=False,
|
260 |
+
label="Private repository",
|
261 |
+
info="Create a private repository instead of a public one",
|
262 |
)
|
263 |
overwritte = gr.Checkbox(
|
264 |
value=False,
|
265 |
+
label="Overwrite repository content",
|
266 |
+
info="Enable pushing files on existing repositories, potentially overwriting existing files",
|
267 |
)
|
268 |
interface = gr.Interface(
|
269 |
fn=quantize_model,
|