Spaces:

PhysicsWallahAI
/

Aryabhata-Demo

Running

App Files Files Community

Aryabhata-Demo / app.py

RitvikPW

Update app.py

6299cb7 verified 7 days ago

raw

history blame contribute delete

2.85 kB

	# import os
	# os.system("pip install flash-attn --no-build-isolation")

	import gradio as gr
	import transformers
	from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
	from transformers import StopStringCriteria, StoppingCriteriaList

	from datasets import load_dataset, concatenate_datasets
	import torch
	import threading

	model_id = "PhysicsWallahAI/Aryabhata-1.0"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id)#, torch_dtype=torch.bfloat16, device_map="cuda", attn_implementation="flash_attention_2")

	def process_questions(example):
	example["question_text"] = example["question"]
	options = "\n".join([f"{chr(65+e)}. {op}" for e, op in enumerate(example["options"])])
	example["question_text"] += "\n" + options
	example["question_text"] = example["question_text"]
	return example

	dataset = concatenate_datasets([
	load_dataset("PhysicsWallahAI/JEE-Main-2025-Math", "jan", split="test"),
	load_dataset("PhysicsWallahAI/JEE-Main-2025-Math", "apr", split="test"),
	])
	examples = list(dataset.map(process_questions, remove_columns=dataset.column_names)["question_text"])
	print(examples[0])


	# add options

	stop_strings = ["<\|im_end\|>", "<\|end\|>", "<im_start\|>", "```python\n", "<\|im_start\|>", "]}}]}}]", " <im_start>"]


	def strip_bad_tokens(s, stop_strings):
	for suffix in stop_strings:
	if s.endswith(suffix):
	return s[:-len(suffix)]
	return s

	def generate_answer_stream(question):
	messages = [
	{'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
	{'role': 'user', 'content': question}
	]

	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	inputs = tokenizer([text], return_tensors="pt")#.to("cuda")

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
	stopping = StoppingCriteriaList([StopStringCriteria(tokenizer, stop_strings)])


	thread = threading.Thread(
	target=model.generate,
	kwargs=dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=4096,
	stopping_criteria=stopping,
	)
	)
	thread.start()

	output = ""
	for token in streamer:
	print(token)
	output += token
	output = strip_bad_tokens(output, stop_strings)
	yield output

	demo = gr.Interface(
	fn=generate_answer_stream,
	inputs=gr.Textbox(lines=4, label="Enter a Math Question"),
	outputs=gr.Textbox(label="Model's Response"),
	examples=examples,
	title="Aryabhata 1.0",
	description="We are disabling GPUs on this space, we will hosting the model on a separate space soon",
	)

	if __name__ == "__main__":
	demo.launch()