Spaces:

Didier
/

Text_translation

Sleeping

Didier commited on Sep 16, 2024

Commit

5df3581

1 Parent(s): 9f5045e

Using BitsAndBytesConfig

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,18 +9,31 @@ Date: 2024-09-07
 import spaces
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import gradio as gr
 #
 # Load the "small" MADLAD400 model
 #
 model_name = "google/madlad400-10b-mt"
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 model = AutoModelForSeq2SeqLM.from_pretrained(
     model_name,
     device_map="auto",
     torch_dtype=torch.float16,
-    load_in_8bit=True)
 model = torch.compile(model)
 #
@@ -37,6 +50,8 @@ def translate_text(
     Input text will be split into chunk that will be translated sequentially.
     We will have up to sents_per_chunk sentences in a given chunk.
     """
     input_text = f"<2{tgt_lang}> {text}"
     input_ids = tokenizer(
         input_text, return_tensors="pt",

 import spaces
 import torch
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from transformers import BitsAndBytesConfig
 import gradio as gr
 #
 # Load the "small" MADLAD400 model
 #
 model_name = "google/madlad400-10b-mt"
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+#quantization_config = BitsAndBytesConfig(
+#    load_in_8bit=True,
+#    llm_int8_threshold=200.0 # https://discuss.huggingface.co/t/correct-usage-of-bitsandbytesconfig/33809/5
+#)
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 model = AutoModelForSeq2SeqLM.from_pretrained(
     model_name,
     device_map="auto",
     torch_dtype=torch.float16,
+    quantization_config=quantization_config)
 model = torch.compile(model)
 #
     Input text will be split into chunk that will be translated sequentially.
     We will have up to sents_per_chunk sentences in a given chunk.
     """
+    if not tgt_lang:
+        tgt_lang = "en"
     input_text = f"<2{tgt_lang}> {text}"
     input_ids = tokenizer(
         input_text, return_tensors="pt",