MekkCyber commited on
Commit
1fdbd50
·
1 Parent(s): 82f366f
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -91,9 +91,9 @@ def save_model(model, model_name, quantization_type, group_size=128, username=No
91
  repo_name = f"{username}/{quantized_model_name}"
92
  else :
93
  if quantization_type == "int4_weight_only" :
94
- repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}-gs_{group_size}"
95
  else :
96
- repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{quantization_type.lower()}"
97
 
98
  model_card = create_model_card(repo_name, quantization_type, group_size)
99
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
@@ -143,31 +143,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
143
 
144
  with gr.Row():
145
  with gr.Column():
146
- model_name = HuggingfaceHubSearch(
147
- label="Hub Model ID",
148
- placeholder="Search for model id on Huggingface",
149
- search_type="model",
150
- )
151
- quantization_type = gr.Dropdown(
152
- label="Quantization Type",
153
- choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
154
- value="int8_weight_only"
155
- )
156
- group_size = gr.Number(
157
- label="Group Size (only for int4_weight_only)",
158
- value=128,
159
- interactive=True
160
- )
161
- # device = gr.Dropdown(
162
- # label="Device (int4 only works with cuda)",
163
- # choices=["cuda", "cpu"],
164
- # value="cuda"
165
- # )
166
- quantized_model_name = gr.Textbox(
167
- label="Model Name (optional : to override default)",
168
- value="",
169
- interactive=True
170
- )
 
 
 
171
  # with gr.Row():
172
  # username = gr.Textbox(
173
  # label="Hugging Face Username",
 
91
  repo_name = f"{username}/{quantized_model_name}"
92
  else :
93
  if quantization_type == "int4_weight_only" :
94
+ repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}-gs{group_size}"
95
  else :
96
+ repo_name = f"{username}/{model_name.split('/')[-1]}-torchao-{MAP_QUANT_TYPE_TO_NAME[quantization_type.lower()]}"
97
 
98
  model_card = create_model_card(repo_name, quantization_type, group_size)
99
  with open(os.path.join(tmpdirname, "README.md"), "w") as f:
 
143
 
144
  with gr.Row():
145
  with gr.Column():
146
+ with gr.Row():
147
+ model_name = HuggingfaceHubSearch(
148
+ label="Hub Model ID",
149
+ placeholder="Search for model id on Huggingface",
150
+ search_type="model",
151
+ )
152
+
153
+ with gr.Row():
154
+ quantization_type = gr.Dropdown(
155
+ label="Quantization Type",
156
+ choices=["int4_weight_only", "int8_weight_only", "int8_dynamic_activation_int8_weight"],
157
+ value="int8_weight_only"
158
+ )
159
+ group_size = gr.Number(
160
+ label="Group Size (only for int4_weight_only)",
161
+ value=128,
162
+ interactive=True
163
+ )
164
+ # device = gr.Dropdown(
165
+ # label="Device (int4 only works with cuda)",
166
+ # choices=["cuda", "cpu"],
167
+ # value="cuda"
168
+ # )
169
+ quantized_model_name = gr.Textbox(
170
+ label="Model Name (optional : to override default)",
171
+ value="",
172
+ interactive=True
173
+ )
174
  # with gr.Row():
175
  # username = gr.Textbox(
176
  # label="Hugging Face Username",