Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -132,10 +132,10 @@ class StopOnTokens(StoppingCriteria):
|
|
132 |
|
133 |
def initialize_model():
|
134 |
quantization_config = BitsAndBytesConfig(
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
)
|
140 |
|
141 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
@@ -147,14 +147,14 @@ def initialize_model():
|
|
147 |
quantization_config=quantization_config,
|
148 |
torch_dtype=torch.bfloat16,
|
149 |
trust_remote_code=True
|
150 |
-
)
|
151 |
|
152 |
return model, tokenizer
|
153 |
|
154 |
def format_response(text):
|
155 |
return text.replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n') \
|
156 |
-
.replace("[
|
157 |
-
.replace("[
|
158 |
.replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n') \
|
159 |
.replace("[Answer]", '\n<strong class="special-tag">[Answer]</strong>\n')
|
160 |
@spaces.GPU(duration=360)
|
|
|
132 |
|
133 |
def initialize_model():
|
134 |
quantization_config = BitsAndBytesConfig(
|
135 |
+
load_in_8bit=True,
|
136 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
137 |
+
bnb_8bit_quant_type="nf4",
|
138 |
+
bnb_8bit_use_double_quant=True,
|
139 |
)
|
140 |
|
141 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
|
147 |
quantization_config=quantization_config,
|
148 |
torch_dtype=torch.bfloat16,
|
149 |
trust_remote_code=True
|
150 |
+
)
|
151 |
|
152 |
return model, tokenizer
|
153 |
|
154 |
def format_response(text):
|
155 |
return text.replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n') \
|
156 |
+
.replace("[/Reason]", '\n<strong class="special-tag">[/Reason]</strong>\n') \
|
157 |
+
.replace("[/Answer]", '\n<strong class="special-tag">[/Answer]</strong>\n') \
|
158 |
.replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n') \
|
159 |
.replace("[Answer]", '\n<strong class="special-tag">[Answer]</strong>\n')
|
160 |
@spaces.GPU(duration=360)
|