ImageEdit-GOT-OCR

Paused

Tonic commited on Nov 26, 2023

Commit

aa91997

1 Parent(s): b9eff4b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,6 +15,16 @@ model_name = "allenai/tulu-2-dpo-13b"
 tokenizer = AutoTokenizer.from_pretrained("allenai/tulu-2-dpo-13b")
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
 class TuluChatBot:
     def __init__(self, model, tokenizer, system_message="You are 🌷Tulu, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
         self.model = model
@@ -30,7 +40,7 @@ class TuluChatBot:
     def Tulu(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
         prompt = self.format_prompt(user_message)
-        inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
         input_ids = inputs["input_ids"].to(self.model.device)
         attention_mask = inputs["attention_mask"].to(self.model.device)

 tokenizer = AutoTokenizer.from_pretrained("allenai/tulu-2-dpo-13b")
 model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
+bos_token_id = 1
+eos_token_id = 2
+tokenizer.bos_token_id = bos_token_id
+tokenizer.eos_token_id = eos_token_id
+model.config.bos_token_id = bos_token_id
+model.config.eos_token_id = eos_token_id
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+model.config.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
 class TuluChatBot:
     def __init__(self, model, tokenizer, system_message="You are 🌷Tulu, an AI language model created by Tonic-AI. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
         self.model = model
     def Tulu(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
         prompt = self.format_prompt(user_message)
+        inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=True)
         input_ids = inputs["input_ids"].to(self.model.device)
         attention_mask = inputs["attention_mask"].to(self.model.device)