Spaces:

PhysicsWallahAI
/

Aryabhata-Demo

Running

App Files Files Community

pw-ai-research commited on 25 days ago

Commit

fdedf62

verified ·

1 Parent(s): 901a060

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -45

app.py CHANGED Viewed

@@ -6,11 +6,12 @@ from transformers import StopStringCriteria, StoppingCriteriaList
 from datasets import load_dataset, concatenate_datasets
 import torch
-import threading
-model_id = "PhysicsWallahAI/Aryabhata-1.0"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
 def process_questions(example):
     example["question_text"] = example["question"]
@@ -26,53 +27,14 @@ dataset = concatenate_datasets([
 examples = dataset.map(process_questions, remove_columns=dataset.column_names)["question_text"]
-# add options
-stop_strings = ["<|im_end|>", "<|end|>", "<im_start|>", "```python\n", "<|im_start|>", "]}}]}}]"]
-def strip_bad_tokens(s, stop_strings):
-    for suffix in stop_strings:
-        if s.endswith(suffix):
-            return s[:-len(suffix)]
-    return s
 def generate_answer_stream(question):
     messages = [
         {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
         {'role': 'user', 'content': question}
     ]
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    inputs = tokenizer([text], return_tensors="pt")
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    stopping = StoppingCriteriaList([StopStringCriteria(tokenizer, stop_strings)])
-    thread = threading.Thread(
-        target=model.generate,
-        kwargs=dict(
-            **inputs,
-            streamer=streamer,
-            max_new_tokens=4096,
-            stopping_criteria=stopping,
-        )
-    )
-    thread.start()
-    output = ""
-    for token in streamer:
-        print(token)
-        output += token
-        output = strip_bad_tokens(output, stop_strings)
-        yield output
 demo = gr.Interface(
     fn=generate_answer_stream,

 from datasets import load_dataset, concatenate_datasets
 import torch
+from vllm import LLM, SamplingParams
+llm = LLM(model="PhysicsWallahAI/Aryabhata-1.0")
+sampling_params = SamplingParams(temperature=0.0, max_tokens=4*1024, stop=["<|im_end|>", "<|end|>", "<im_start|>", "⁠```python\n", "⁠<|im_start|>", "]}}]}}]"])
 def process_questions(example):
     example["question_text"] = example["question"]
 examples = dataset.map(process_questions, remove_columns=dataset.column_names)["question_text"]
 def generate_answer_stream(question):
     messages = [
         {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
         {'role': 'user', 'content': question}
     ]
+    results = llm.chat(messages, sampling_params)
+    return results[0].outputs[0].text.strip()
 demo = gr.Interface(
     fn=generate_answer_stream,