Spaces:

davidkim205
/

kgrammar-2

Sleeping

App Files Files Community

kgrammar-2 / app.py

davidkim205

Update app.py

62b730b verified 18 days ago

raw

history blame contribute delete

7.25 kB

	import gradio as gr
	from huggingface_hub import InferenceClient


	# Function to create an Inference Client based on selected model
	def create_inference_client(model_name):
	return InferenceClient(model_name)


	# Function to generate a response
	def respond(
	message,
	system_message,
	model,
	max_tokens,
	temperature,
	top_p,
	):
	# Create InferenceClient based on model selection
	client = create_inference_client(model)

	messages = [{"role": "system", "content": system_message}]
	messages.append({"role": "user", "content": message})

	response = ""
	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content
	response += token
	print(response)
	yield response


	def main():
	description_text1 = """<span style="font-size: 23px;">The kgrammar model is a state-of-the-art language assessment model designed to evaluate Korean sentences, specifically focusing on detecting instances where a response deviates by incorporating foreign languages or mixing multiple languages within a sentence.</span></br>
	<span style="font-size: 23px;">Built on the Gemma-2-9B architecture, kgrammar aims to ensure language consistency and clarity in Korean text, making it a valuable tool for assessing and refining language models that generate Korean responses.</span> </br></br>
	"""

	description_text2 = """<span style="font-size: 23px;">The kgrammar-testset was created to train and validate the kgrammar model. This dataset was generated using GPT-4o, incorporating predefined prompts designed to introduce grammatical errors into responses when a question was given.</span></br>
	<span style="font-size: 23px;">To ensure a balanced distribution, the dataset consists of 50% general questions, 25% math-related questions, 25% coding-related questions. This structure helps prevent mathematical expressions and code from being mistakenly identified as language errors.</span> </br></br>
	"""

	description_text3 = """<span style="font-size: 23px;">kgrammar currently has the following versions available → kgrammar-2-1b, kgrammar-2-3b.</span></br></br>
	"""

	examples_list = [["우선, 성인 티켓의 가격은 $4입니다. 총 판매된 티켓 수는 59장이며, 학생 티켓은 9장입니다. 我们需要计算学生票价。성인 티켓은 50장이 판매되었으므로, 성인 티켓으로 얻은 수익은 50 * 4 = $200입니다. Total revenue是 $222.50, 所以学生票的收入为 $222.50 - $200 = $22.50。俄语로 说, 每张学生票的价格 = $22.50 ÷ 9 = $2.50。학생 티켓의 가격은 2.5ดอลล่าครับ.", "한국어 문맥상 부자연스러운 부분을 찾으시오. 오류 문장과 개수는 <incorrect grammar> </incorrect grammar> tag, 즉 <incorrect grammar> - 오류 문장과 설명 </incorrect grammar> 안에 담겨 있으며, <wrong count> </wrong count> tag, 즉 <wrong count> 오류 개수 </wrong count> 이다.", None, None, None, None],
	["슈트라우스의 음악에서 모차르트의 음악 형태를 띈 악장의 끝에는 호른이 중심적으로 연주됩니다. Это инструмент, который связывает эпохи и стили, создавая неповторимую атмосферу и оставляя слушателей в восхищении.", "한국어 문맥상 부자연스러운 부분을 찾으시오. 오류 문장과 개수는 <incorrect grammar> </incorrect grammar> tag, 즉 <incorrect grammar> - 오류 문장과 설명 </incorrect grammar> 안에 담겨 있으며, <wrong count> </wrong count> tag, 즉 <wrong count> 오류 개수 </wrong count> 이다.", None, None, None, None],
	["충남도가 백신 접종 안내 및 이상반응 모니터링을 위해 협정을 맺은 곳은 SK텔레콤(SKT)입니다. SKT의 AI 기술인 '누구(NUGU)'를 활용한 '누구 백신 케어콜' 서비스를 통해 이 업무를 수행할 계획입니다.", "한국어 문맥상 부자연스러운 부분을 찾으시오. 오류 문장과 개수는 <incorrect grammar> </incorrect grammar> tag, 즉 <incorrect grammar> - 오류 문장과 설명 </incorrect grammar> 안에 담겨 있으며, <wrong count> </wrong count> tag, 즉 <wrong count> 오류 개수 </wrong count> 이다.", None, None, None, None]
	]

	with gr.Blocks() as app:
	gr.Markdown("# 🤖 Checking Korean Grammar Accuracy with kgrammar")
	gr.Markdown("")
	gr.Markdown("## \| [Model](https://huggingface.co/collections/davidkim205/keval-2-67ac5400f5eef4984cc5dbbb) \| [Paper](https://davidkim205.github.io/keval.html) \| [Code](https://github.com/davidkim205/simple-keval) \|")
	gr.Markdown("")
	gr.Markdown("## What Is kgrammar?")
	gr.Markdown(description_text1)
	gr.Markdown("## kgrammar Dataset")
	gr.Markdown(description_text2)
	gr.Markdown("## Available kgrammar Models")
	gr.Markdown(description_text3)

	gr.HTML("""<style>
	#submit-btn {
	background-color: #FF7F00;
	color: white;
	border: none;
	padding: 10px 20px;
	font-size: 16px;
	cursor: pointer;
	}
	#submit-btn:hover {
	background-color: #DA8A67;
	}
	</style>
	""")

	gr.Markdown("## 🖱️ Try it out!")
	with gr.Row():
	with gr.Column(scale=1):
	text1 = gr.Textbox(label="User Message")
	text2 = gr.Textbox(label="System message", value="한국어 문맥상 부자연스러운 부분을 찾으시오. 오류 문장과 개수는 <incorrect grammar> </incorrect grammar> tag, 즉 <incorrect grammar> - 오류 문장과 설명 </incorrect grammar> 안에 담겨 있으며, <wrong count> </wrong count> tag, 즉 <wrong count> 오류 개수 </wrong count> 이다.")
	dropdown = gr.Dropdown(label="Model Selection", choices=["davidkim205/kgrammar-2-1b", "davidkim205/kgrammar-2-3b"], value="davidkim205/kgrammar-2-1b")
	slider1 = gr.Slider(label="Max new tokens", minimum=1, maximum=2048, value=1024, step=1)
	slider2 = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, value=1.0, step=0.1)
	slider3 = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
	submit = gr.Button("Submit", elem_id="submit-btn")
	with gr.Column(scale=1):
	output = gr.Textbox(label="Output")

	gr.Markdown("")
	gr.Markdown("## Examples")
	gr.Examples(label="Input Example",
	examples=examples_list,
	inputs=[text1, text2, dropdown, slider1, slider2, slider3])

	submit.click(
	fn=respond,
	inputs=[text1, text2, dropdown, slider1, slider2, slider3],
	outputs=output
	)

	app.launch()


	if __name__ == "__main__":
	main()