Spaces:

prithivMLmods
/

core-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on Jan 22

Commit

1d74de7

verified ·

1 Parent(s): bb36c83

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -3

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ DESCRIPTION = """
 # QwQ Distill
 """
-css= '''
 h1 {
   text-align: center;
   display: block;
@@ -40,6 +40,9 @@ model = AutoModelForCausalLM.from_pretrained(
 model.config.sliding_window = 4096
 model.eval()
 @spaces.GPU(duration=120)
 def generate(
@@ -54,15 +57,23 @@ def generate(
     conversation = chat_history.copy()
     conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
@@ -71,6 +82,7 @@ def generate(
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()

 # QwQ Distill
 """
+css = '''
 h1 {
   text-align: center;
   display: block;
 model.config.sliding_window = 4096
 model.eval()
+# Set the pad token ID if it's not already set
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
 @spaces.GPU(duration=120)
 def generate(
     conversation = chat_history.copy()
     conversation.append({"role": "user", "content": message})
+    # Apply chat template and get input_ids and attention_mask
+    inputs = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
+    input_ids = inputs["input_ids"]
+    attention_mask = inputs["attention_mask"]
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
+    attention_mask = attention_mask.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
+        attention_mask=attention_mask,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
+        pad_token_id=tokenizer.pad_token_id,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()