davidkim205 commited on
Commit
6b776ea
Β·
verified Β·
1 Parent(s): 3865332

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -38,16 +38,17 @@ def respond(
38
 
39
 
40
  def main():
41
- description_text = """
42
- </br><span style="font-size: 22px;">Use <strong>Keval</strong>, an offline-ready evaluation framework for Korean AI models, to assess whether an LLM-generated response is relevant and appropriate for a given question. Submit the inputs to generate the evaluation results.</span></br></br>
43
- <span style="font-size: 22px;">1️⃣ <strong>Question</strong>: Provide a question from the ko-bench dataset.</span> </br>
44
- <span style="font-size: 22px;">2️⃣ <strong>Answer</strong>: Input the LLM-generated response to the Question.</span> </br>
45
- <span style="font-size: 22px;">3️⃣ <strong>System Message</strong>: Define the prompt used for evaluation.</span> </br>
46
- <span style="font-size: 22px;">4️⃣ <strong>Model Selection</strong>: Choose a Keval model for assessment.</span> </br>
47
- <span style="font-size: 22px;">5️⃣ <strong>Max New Tokens</strong>: Set the maximum number of tokens for the evaluation output.</span> </br>
48
- <span style="font-size: 22px;">6️⃣ <strong>Temperature</strong>: Adjust the temperature to balance predictability and creativity in Keval’s evaluation.</span> </br>
49
- <span style="font-size: 22px;">7️⃣ <strong>Top-p (Nucleus Sampling)</strong>: Modify the top-p value to control fluency and diversity in the evaluation output.</span> </br></br>
50
- <span style="font-size: 22px;"><a href="https://huggingface.co/davidkim205/keval-2-9b" target="_blank" style="text-decoration: none; color: #000080;">Keval model link << Click here!</a></span> </br></br>
 
51
  """
52
 
53
  examples_list = [["졜근 μ œμ£Όλ„ 여행을 λ‹€λ…€μ˜€λ©΄μ„œ κΌ­ 가봐야 ν•  λͺ…μ†Œλ₯Ό κ°•μ‘°ν•˜λŠ” μž¬λ―ΈμžˆλŠ” μ—¬ν–‰ λΈ”λ‘œκ·Έ 글을 μž‘μ„±ν•˜μ‹œμ˜€.","## μ œμ£Όλ„? μ˜€λŠ˜λΆ€ν„° \"μ œμ£Όλ„ νƒν—˜κ°€\"둜 μ°νžˆλŠ” 법! 🏝️πŸ”₯\n\nμ œμ£Όλ„, κ·Έ μžμ²΄λ‘œλ„ 멋진 이름이죠? λ°”λ‹€κ°€ κ·Έλ €λ‚Έ 섬, ν•˜λŠ˜μ΄ λΉšμ–΄λ‚Έ μžμ—°, 그리고 μΈκ°„μ˜ 손길이 λ§Œλ“  맀λ ₯κΉŒμ§€! 제주 νƒν—˜κ°€κ°€ 되기 μœ„ν•΄ κΌ­ 가봐야 ν•  λͺ…μ†Œλ“€, 뭐가 μžˆμ„κΉŒμš”? μ—¬κΈ° λ”±νžˆ μž‘μ±„μ²˜λŸΌ κΈ°λŒ€λ˜λŠ” μˆœκ°„μ΄ μžˆμ§€ μ•Šλ‚˜μš”? κ·Έ μˆœκ°„μ„ λ§Œλ“œλŠ” 곳듀이 λ°”λ‘œ 이곳이라면, 뭐가 λ“€μ—ˆλ‚˜ λ³΄μ„Έμš”!\n\n**1. 유λͺ…ν•œ 곳도, μˆ¨κ²¨μ§„ 보석도 νƒν—˜ν•΄λ΄μ•Όμ§€!**\n\nμ œμ£Όλ„μ—μ„œ 빠질 수 μ—†λŠ” λͺ…μ†Œμ£ . ν•œλΌμ‚°μ˜ μ‹ λΉ„ν•œ μžμ—° 속에 μˆ¨κ²¨μ§„ μˆ²κΈΈμ„ 걸으며, λ°”λ‹€ λ„ˆλ¨Έμ˜ ν‘Έλ₯Έ 끝을 μ—Ώλ³Ό 수 μžˆλŠ” ν•œλΌμ‚° λ“±λ°˜μ€ μžŠμ§€ λͺ»ν•  κ²½ν—˜μ„ 선사할 κ±°μ˜ˆμš”. ν•˜μ§€λ§Œ 이곳의 맀λ ₯은 κ·Έ 자체둜 λλ‚˜μ§€ μ•Šμ•„μš”. μˆ¨κ²¨μ§„ 보석 같은 곳듀을 μ°Ύμ•„λ‚΄λŠ” 것이 제주 νƒν—˜κ°€μ˜ 재미죠! 예λ₯Ό λ“€μ–΄, μ˜€λ¦„ 쀑심뢀에 자리 μž‘μ€ μ²œμΉ­μ•”μ€ λ°”λ‹€κ°€ λΉšμ–΄λ‚Έ 예술 μž‘ν’ˆμ²˜λŸΌ μ‹ λΉ„λ‘­κ³ , 지리산 μ£Όλ³€μ˜ μ˜› λ―Όμ†λ§ˆμ„μ€ μ‹œκ°„μ΄ 멈좘 λ“―ν•œ μˆœκ°„μ„ 선사할 κ±°μ˜ˆμš”.\n\n**2. λ§›μžˆλŠ” μŒμ‹λ„ νƒν—˜μ˜ μΌν™˜μ΄μ§€!**\n\nμ œμ£Όλ„μ˜ 맛은 νƒν—˜κ°€μ˜ νž˜μ„ 뢁돋아주죠! μ‹±μ‹±ν•œ ν•΄μ‚°λ¬Όλ‘œ λ§Œλ“  νšŒμ™€ 멍게, 그리고 λ§›μžˆλŠ” ν•œμ‹κ³Ό ν•¨κ»˜, μ œμ£Όλ„μ˜ νŠΉμƒ‰μ„ 담은 μŒμ‹λ“€μ„ λ§›λ³΄μ„Έμš”. 특히, ν•œλΌμ‚° 근처의 μˆ¨κ²¨μ§„ ν•œμ‹λ‹Ήμ—μ„œλŠ” 맛집듀이 λͺ¨μ—¬ μžˆμ–΄μš”. μ΄κ³³μ—μ„œ μ–»λŠ” λ§›μ˜ 즐거움은 νƒν—˜μ˜ ν”Όλ‘œλ₯Ό μ”»μ–΄λ‚΄λŠ” 졜고의 보상이 될 κ±°μ˜ˆμš”.\n\n**3. μžμ—°κ³Ό ν•¨κ»˜ν•˜λŠ” μ•‘ν‹°λΉ„ν‹°λŠ” ν•„μˆ˜!**\n\nμ œμ£Όλ„μ˜ μžμ—°μ€ νƒν—˜κ°€λ₯Ό μœ„ν•œ λ¬΄ν•œν•œ 놀이터죠! μ„œν•‘, μŠ€λ…Έν΄λ§, ν•˜μ΄ν‚Ή λ“± λ‹€μ–‘ν•œ μ•‘ν‹°λΉ„ν‹°λ₯Ό 톡해 μžμ—°κ³Όμ˜ ꡐ감을 κ²½ν—˜ν•΄λ³΄μ„Έμš”. 예λ₯Ό λ“€μ–΄, 제주 ν•΄μ•ˆμ„ μ„ 따라 λ‹¬λ¦¬λŠ” λ°”λ‹€ μœ„ μžμ „κ±° νƒ€κΈ°λ‚˜, μˆ¨κ²¨μ§„ ν•΄λ³€μ—μ„œμ˜ 캠핑은 μžŠμ§€ λͺ»ν•  좔��을 선사할 κ±°μ˜ˆμš”.\n\n**4. 제주만의 문화와 μ˜ˆμˆ λ„ νƒν—˜ν•΄λ³΄μ„Έμš”!**\n\nμ œμ£Όλ„λŠ” λ‹¨μˆœν•œ μžμ—°μ˜ μ•„λ¦„λ‹€μ›€λ§Œμ΄ μ•„λ‹Œ, λ…νŠΉν•œ 문화와 μ˜ˆμˆ λ‘œλ„ 유λͺ…ν•΄μš”. 예λ₯Ό λ“€μ–΄, 제주 λ―Όμ†μ΄Œμ΄λ‚˜ μ˜€λ¦„ 박물관은 제주의 역사와 λ¬Έν™”λ₯Ό μ—Ώλ³Ό 수 μžˆλŠ” 곳이며, 제주 특유의 κ³΅μ˜ˆν’ˆμ„ κ΅¬κ²½ν•˜λ©° 즐거운 μ‹œκ°„μ„ 보낼 수 μžˆμ–΄μš”.\n\n이제 μ œμ£Όλ„ νƒν—˜κ°€κ°€ λ˜λŠ” μ€€λΉ„κ°€ λλ‚¬μœΌλ‹ˆ, λ– λ‚˜ λ³΄μ„Έμš”! μ œμ£Όλ„μ˜ 맀λ ₯을 느끼며, μžμ‹ λ§Œμ˜ νŠΉλ³„ν•œ μ—¬ν–‰ 이야기λ₯Ό λ§Œλ“€μ–΄λ³΄μ„Έμš”. 그리고 μžŠμ§€ λ§ˆμ„Έμš”, μ œμ£ΌλŠ” 당신이 λ‹€μ‹œ ν•œλ²ˆ λŒμ•„μ˜¬ λ§Œν•œ κ³³μ΄λΌλŠ” 사싀을! πŸ˜‰",None, None, None, None, None],
@@ -56,8 +57,13 @@ def main():
56
  ]
57
 
58
  with gr.Blocks() as app:
59
- gr.Markdown("# Evaluating LLM Responses with Keval")
60
- gr.Markdown(description_text)
 
 
 
 
 
61
 
62
  gr.HTML("""<style>
63
  #submit-btn {
@@ -74,7 +80,7 @@ def main():
74
  </style>
75
  """)
76
 
77
- gr.Markdown("## Try it out!")
78
  with gr.Row():
79
  with gr.Column(scale=1):
80
  text1 = gr.Textbox(label="Question", value="이 λŒ€ν™” μ „λ°˜μ— 걸쳐 μ•„μ΄μ–Έλ§¨μ˜ ν† λ‹ˆ μŠ€νƒ€ν¬ 페λ₯΄μ†Œλ‚˜λ₯Ό κ΅¬ν˜„ν•΄ λ³΄μ„Έμš”. \"μ•„μ΄μ–Έλ§¨μœΌλ‘œμ„œ\"와 같은 문ꡬ둜 μ‹œμž‘ν•˜μ§€ λ§ˆμ„Έμš”. 첫 번째 μ§ˆλ¬Έμ€ λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€. 아이언맨이 λ˜μ–΄ κ°€μž₯ 쒋은 뢀뢄은 λ¬΄μ—‡μž…λ‹ˆκΉŒ?")
 
38
 
39
 
40
  def main():
41
+ description_text1 = """
42
+ </br><span style="font-size: 23px;">The keval model is an advanced evaluation model specifically designed to assess Korean language models using the LLM-as-a-judge approach. Unlike traditional evaluation methods that relied on ChatGPT, keval provides an independent and specialized alternative for evaluating Korean LLMs.</span></br>
43
+ <span style="font-size: 23px;">keval is based on the Gemma2-9B architecture and has been further optimized using Supervised Fine-Tuning (SFT) and Direct Policy Optimization (DPO). The model is trained on the newly developed Ko-Bench dataset, which is inspired by MT-Bench but adapted to capture Korean linguistic nuances more effectively.</span> </br></br>
44
+ """
45
+
46
+ description_text2 = """
47
+ </br><span style="font-size: 23px;">keval is trained and evaluated using the Ko-Bench dataset, a custom-built benchmark inspired by MT-Bench but specifically tailored for assessing Korean language models. The dataset includes diverse tasks covering a wide range of user scenarios, allowing for a thorough evaluation of: Multi-turn conversation ability, Instruction adherence, Contextual understanding.</span></br></br>
48
+ """
49
+
50
+ description_text3 = """
51
+ </br><span style="font-size: 23px;">keval currently has the following versions available: keval-2-1b, keval-2-3b.</span></br></br>
52
  """
53
 
54
  examples_list = [["졜근 μ œμ£Όλ„ 여행을 λ‹€λ…€μ˜€λ©΄μ„œ κΌ­ 가봐야 ν•  λͺ…μ†Œλ₯Ό κ°•μ‘°ν•˜λŠ” μž¬λ―ΈμžˆλŠ” μ—¬ν–‰ λΈ”λ‘œκ·Έ 글을 μž‘μ„±ν•˜μ‹œμ˜€.","## μ œμ£Όλ„? μ˜€λŠ˜λΆ€ν„° \"μ œμ£Όλ„ νƒν—˜κ°€\"둜 μ°νžˆλŠ” 법! 🏝️πŸ”₯\n\nμ œμ£Όλ„, κ·Έ μžμ²΄λ‘œλ„ 멋진 이름이죠? λ°”λ‹€κ°€ κ·Έλ €λ‚Έ 섬, ν•˜λŠ˜μ΄ λΉšμ–΄λ‚Έ μžμ—°, 그리고 μΈκ°„μ˜ 손길이 λ§Œλ“  맀λ ₯κΉŒμ§€! 제주 νƒν—˜κ°€κ°€ 되기 μœ„ν•΄ κΌ­ 가봐야 ν•  λͺ…μ†Œλ“€, 뭐가 μžˆμ„κΉŒμš”? μ—¬κΈ° λ”±νžˆ μž‘μ±„μ²˜λŸΌ κΈ°λŒ€λ˜λŠ” μˆœκ°„μ΄ μžˆμ§€ μ•Šλ‚˜μš”? κ·Έ μˆœκ°„μ„ λ§Œλ“œλŠ” 곳듀이 λ°”λ‘œ 이곳이라면, 뭐가 λ“€μ—ˆλ‚˜ λ³΄μ„Έμš”!\n\n**1. 유λͺ…ν•œ 곳도, μˆ¨κ²¨μ§„ 보석도 νƒν—˜ν•΄λ΄μ•Όμ§€!**\n\nμ œμ£Όλ„μ—μ„œ 빠질 수 μ—†λŠ” λͺ…μ†Œμ£ . ν•œλΌμ‚°μ˜ μ‹ λΉ„ν•œ μžμ—° 속에 μˆ¨κ²¨μ§„ μˆ²κΈΈμ„ 걸으며, λ°”λ‹€ λ„ˆλ¨Έμ˜ ν‘Έλ₯Έ 끝을 μ—Ώλ³Ό 수 μžˆλŠ” ν•œλΌμ‚° λ“±λ°˜μ€ μžŠμ§€ λͺ»ν•  κ²½ν—˜μ„ 선사할 κ±°μ˜ˆμš”. ν•˜μ§€λ§Œ 이곳의 맀λ ₯은 κ·Έ 자체둜 λλ‚˜μ§€ μ•Šμ•„μš”. μˆ¨κ²¨μ§„ 보석 같은 곳듀을 μ°Ύμ•„λ‚΄λŠ” 것이 제주 νƒν—˜κ°€μ˜ 재미죠! 예λ₯Ό λ“€μ–΄, μ˜€λ¦„ 쀑심뢀에 자리 μž‘μ€ μ²œμΉ­μ•”μ€ λ°”λ‹€κ°€ λΉšμ–΄λ‚Έ 예술 μž‘ν’ˆμ²˜λŸΌ μ‹ λΉ„λ‘­κ³ , 지리산 μ£Όλ³€μ˜ μ˜› λ―Όμ†λ§ˆμ„μ€ μ‹œκ°„μ΄ 멈좘 λ“―ν•œ μˆœκ°„μ„ 선사할 κ±°μ˜ˆμš”.\n\n**2. λ§›μžˆλŠ” μŒμ‹λ„ νƒν—˜μ˜ μΌν™˜μ΄μ§€!**\n\nμ œμ£Όλ„μ˜ 맛은 νƒν—˜κ°€μ˜ νž˜μ„ 뢁돋아주죠! μ‹±μ‹±ν•œ ν•΄μ‚°λ¬Όλ‘œ λ§Œλ“  νšŒμ™€ 멍게, 그리고 λ§›μžˆλŠ” ν•œμ‹κ³Ό ν•¨κ»˜, μ œμ£Όλ„μ˜ νŠΉμƒ‰μ„ 담은 μŒμ‹λ“€μ„ λ§›λ³΄μ„Έμš”. 특히, ν•œλΌμ‚° 근처의 μˆ¨κ²¨μ§„ ν•œμ‹λ‹Ήμ—μ„œλŠ” 맛집듀이 λͺ¨μ—¬ μžˆμ–΄μš”. μ΄κ³³μ—μ„œ μ–»λŠ” λ§›μ˜ 즐거움은 νƒν—˜μ˜ ν”Όλ‘œλ₯Ό μ”»μ–΄λ‚΄λŠ” 졜고의 보상이 될 κ±°μ˜ˆμš”.\n\n**3. μžμ—°κ³Ό ν•¨κ»˜ν•˜λŠ” μ•‘ν‹°λΉ„ν‹°λŠ” ν•„μˆ˜!**\n\nμ œμ£Όλ„μ˜ μžμ—°μ€ νƒν—˜κ°€λ₯Ό μœ„ν•œ λ¬΄ν•œν•œ 놀이터죠! μ„œν•‘, μŠ€λ…Έν΄λ§, ν•˜μ΄ν‚Ή λ“± λ‹€μ–‘ν•œ μ•‘ν‹°λΉ„ν‹°λ₯Ό 톡해 μžμ—°κ³Όμ˜ ꡐ감을 κ²½ν—˜ν•΄λ³΄μ„Έμš”. 예λ₯Ό λ“€μ–΄, 제주 ν•΄μ•ˆμ„ μ„ 따라 λ‹¬λ¦¬λŠ” λ°”λ‹€ μœ„ μžμ „κ±° νƒ€κΈ°λ‚˜, μˆ¨κ²¨μ§„ ν•΄λ³€μ—μ„œμ˜ 캠핑은 μžŠμ§€ λͺ»ν•  좔��을 선사할 κ±°μ˜ˆμš”.\n\n**4. 제주만의 문화와 μ˜ˆμˆ λ„ νƒν—˜ν•΄λ³΄μ„Έμš”!**\n\nμ œμ£Όλ„λŠ” λ‹¨μˆœν•œ μžμ—°μ˜ μ•„λ¦„λ‹€μ›€λ§Œμ΄ μ•„λ‹Œ, λ…νŠΉν•œ 문화와 μ˜ˆμˆ λ‘œλ„ 유λͺ…ν•΄μš”. 예λ₯Ό λ“€μ–΄, 제주 λ―Όμ†μ΄Œμ΄λ‚˜ μ˜€λ¦„ 박물관은 제주의 역사와 λ¬Έν™”λ₯Ό μ—Ώλ³Ό 수 μžˆλŠ” 곳이며, 제주 특유의 κ³΅μ˜ˆν’ˆμ„ κ΅¬κ²½ν•˜λ©° 즐거운 μ‹œκ°„μ„ 보낼 수 μžˆμ–΄μš”.\n\n이제 μ œμ£Όλ„ νƒν—˜κ°€κ°€ λ˜λŠ” μ€€λΉ„κ°€ λλ‚¬μœΌλ‹ˆ, λ– λ‚˜ λ³΄μ„Έμš”! μ œμ£Όλ„μ˜ 맀λ ₯을 느끼며, μžμ‹ λ§Œμ˜ νŠΉλ³„ν•œ μ—¬ν–‰ 이야기λ₯Ό λ§Œλ“€μ–΄λ³΄μ„Έμš”. 그리고 μžŠμ§€ λ§ˆμ„Έμš”, μ œμ£ΌλŠ” 당신이 λ‹€μ‹œ ν•œλ²ˆ λŒμ•„μ˜¬ λ§Œν•œ κ³³μ΄λΌλŠ” 사싀을! πŸ˜‰",None, None, None, None, None],
 
57
  ]
58
 
59
  with gr.Blocks() as app:
60
+ gr.Markdown("# πŸ€– Evaluating LLM Responses with keval")
61
+ gr.Markdown("## What Is keval?")
62
+ gr.Markdown(description_text1)
63
+ gr.Markdown("## Benchmark and Dataset for keval")
64
+ gr.Markdown(description_text2)
65
+ gr.Markdown("## Available keval Models")
66
+ gr.Markdown(description_text3)
67
 
68
  gr.HTML("""<style>
69
  #submit-btn {
 
80
  </style>
81
  """)
82
 
83
+ gr.Markdown("## πŸ–±οΈ Try it out!")
84
  with gr.Row():
85
  with gr.Column(scale=1):
86
  text1 = gr.Textbox(label="Question", value="이 λŒ€ν™” μ „λ°˜μ— 걸쳐 μ•„μ΄μ–Έλ§¨μ˜ ν† λ‹ˆ μŠ€νƒ€ν¬ 페λ₯΄μ†Œλ‚˜λ₯Ό κ΅¬ν˜„ν•΄ λ³΄μ„Έμš”. \"μ•„μ΄μ–Έλ§¨μœΌλ‘œμ„œ\"와 같은 문ꡬ둜 μ‹œμž‘ν•˜μ§€ λ§ˆμ„Έμš”. 첫 번째 μ§ˆλ¬Έμ€ λ‹€μŒκ³Ό κ°™μŠ΅λ‹ˆλ‹€. 아이언맨이 λ˜μ–΄ κ°€μž₯ 쒋은 뢀뢄은 λ¬΄μ—‡μž…λ‹ˆκΉŒ?")