Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -38,17 +38,14 @@ def respond(
|
|
38 |
|
39 |
|
40 |
def main():
|
41 |
-
description_text1 = """
|
42 |
-
</br><span style="font-size: 23px;">The keval model is an advanced evaluation model specifically designed to assess Korean language models using the LLM-as-a-judge approach. Unlike traditional evaluation methods that relied on ChatGPT, keval provides an independent and specialized alternative for evaluating Korean LLMs.</span></br>
|
43 |
<span style="font-size: 23px;">keval is based on the Gemma2-9B architecture and has been further optimized using Supervised Fine-Tuning (SFT) and Direct Policy Optimization (DPO). The model is trained on the newly developed Ko-Bench dataset, which is inspired by MT-Bench but adapted to capture Korean linguistic nuances more effectively.</span> </br></br>
|
44 |
"""
|
45 |
|
46 |
-
description_text2 = """
|
47 |
-
</br><span style="font-size: 23px;">keval is trained and evaluated using the Ko-Bench dataset, a custom-built benchmark inspired by MT-Bench but specifically tailored for assessing Korean language models. The dataset includes diverse tasks covering a wide range of user scenarios, allowing for a thorough evaluation of: Multi-turn conversation ability, Instruction adherence, Contextual understanding.</span></br></br>
|
48 |
"""
|
49 |
|
50 |
-
description_text3 = """
|
51 |
-
</br><span style="font-size: 23px;">keval currently has the following versions available: keval-2-1b, keval-2-3b.</span></br></br>
|
52 |
"""
|
53 |
|
54 |
examples_list = [["μ΅κ·Ό μ μ£Όλ μ¬νμ λ€λ
μ€λ©΄μ κΌ κ°λ΄μΌ ν λͺ
μλ₯Ό κ°μ‘°νλ μ¬λ―Έμλ μ¬ν λΈλ‘κ·Έ κΈμ μμ±νμμ€.","## μ μ£Όλ? μ€λλΆν° \"μ μ£Όλ ννκ°\"λ‘ μ°νλ λ²! ποΈπ₯\n\nμ μ£Όλ, κ·Έ μ체λ‘λ λ©μ§ μ΄λ¦μ΄μ£ ? λ°λ€κ° κ·Έλ €λΈ μ¬, νλμ΄ λΉμ΄λΈ μμ°, κ·Έλ¦¬κ³ μΈκ°μ μκΈΈμ΄ λ§λ 맀λ ₯κΉμ§! μ μ£Ό ννκ°κ° λκΈ° μν΄ κΌ κ°λ΄μΌ ν λͺ
μλ€, λκ° μμκΉμ? μ¬κΈ° λ±ν μ‘μ±μ²λΌ κΈ°λλλ μκ°μ΄ μμ§ μλμ? κ·Έ μκ°μ λ§λλ κ³³λ€μ΄ λ°λ‘ μ΄κ³³μ΄λΌλ©΄, λκ° λ€μλ 보μΈμ!\n\n**1. μ λͺ
ν κ³³λ, μ¨κ²¨μ§ 보μλ ννν΄λ΄μΌμ§!**\n\nμ μ£Όλμμ λΉ μ§ μ μλ λͺ
μμ£ . νλΌμ°μ μ λΉν μμ° μμ μ¨κ²¨μ§ μ²κΈΈμ κ±ΈμΌλ©°, λ°λ€ λλ¨Έμ νΈλ₯Έ λμ μΏλ³Ό μ μλ νλΌμ° λ±λ°μ μμ§ λͺ»ν κ²½νμ μ μ¬ν κ±°μμ. νμ§λ§ μ΄κ³³μ 맀λ ₯μ κ·Έ μμ²΄λ‘ λλμ§ μμμ. μ¨κ²¨μ§ 보μ κ°μ κ³³λ€μ μ°Ύμλ΄λ κ²μ΄ μ μ£Ό ννκ°μ μ¬λ―Έμ£ ! μλ₯Ό λ€μ΄, μ€λ¦ μ€μ¬λΆμ μ리 μ‘μ μ²μΉμμ λ°λ€κ° λΉμ΄λΈ μμ μνμ²λΌ μ λΉλ‘κ³ , μ§λ¦¬μ° μ£Όλ³μ μ λ―Όμλ§μμ μκ°μ΄ λ©μΆ λ―ν μκ°μ μ μ¬ν κ±°μμ.\n\n**2. λ§μλ μμλ ννμ μΌνμ΄μ§!**\n\nμ μ£Όλμ λ§μ ννκ°μ νμ λΆλμμ£Όμ£ ! μ±μ±ν ν΄μ°λ¬Όλ‘ λ§λ νμ λ©κ², κ·Έλ¦¬κ³ λ§μλ νμκ³Ό ν¨κ», μ μ£Όλμ νΉμμ λ΄μ μμλ€μ λ§λ³΄μΈμ. νΉν, νλΌμ° κ·Όμ²μ μ¨κ²¨μ§ νμλΉμμλ λ§μ§λ€μ΄ λͺ¨μ¬ μμ΄μ. μ΄κ³³μμ μ»λ λ§μ μ¦κ±°μμ ννμ νΌλ‘λ₯Ό μ»μ΄λ΄λ μ΅κ³ μ 보μμ΄ λ κ±°μμ.\n\n**3. μμ°κ³Ό ν¨κ»νλ μ‘ν°λΉν°λ νμ!**\n\nμ μ£Όλμ μμ°μ ννκ°λ₯Ό μν 무νν λμ΄ν°μ£ ! μν, μ€λ
Έν΄λ§, νμ΄νΉ λ± λ€μν μ‘ν°λΉν°λ₯Ό ν΅ν΄ μμ°κ³Όμ κ΅κ°μ κ²½νν΄λ³΄μΈμ. μλ₯Ό λ€μ΄, μ μ£Ό ν΄μμ μ λ°λΌ λ¬λ¦¬λ λ°λ€ μ μμ κ±° νκΈ°λ, μ¨κ²¨μ§ ν΄λ³μμμ μΊ νμ μμ§ λͺ»ν μΆμ΅μ μ μ¬ν κ±°μμ.\n\n**4. μ μ£Όλ§μ λ¬Ένμ μμ λ ννν΄λ³΄μΈμ!**\n\nμ μ£Όλλ λ¨μν μμ°μ μλ¦λ€μλ§μ΄ μλ, λ
νΉν λ¬Ένμ μμ λ‘λ μ λͺ
ν΄μ. μλ₯Ό λ€μ΄, μ μ£Ό λ―Όμμ΄μ΄λ μ€λ¦ λ°λ¬Όκ΄μ μ μ£Όμ μμ¬μ λ¬Ένλ₯Ό μΏλ³Ό μ μλ κ³³μ΄λ©°, μ μ£Ό νΉμ μ 곡μνμ ꡬ경νλ©° μ¦κ±°μ΄ μκ°μ λ³΄λΌ μ μμ΄μ.\n\nμ΄μ μ μ£Όλ ννκ°κ° λλ μ€λΉκ° λλ¬μΌλ, λ λ 보μΈμ! μ μ£Όλμ 맀λ ₯μ λλΌλ©°, μμ λ§μ νΉλ³ν μ¬ν μ΄μΌκΈ°λ₯Ό λ§λ€μ΄λ³΄μΈμ. κ·Έλ¦¬κ³ μμ§ λ§μΈμ, μ μ£Όλ λΉμ μ΄ λ€μ νλ² λμμ¬ λ§ν κ³³μ΄λΌλ μ¬μ€μ! π",None, None, None, None, None],
|
@@ -58,6 +55,9 @@ def main():
|
|
58 |
|
59 |
with gr.Blocks() as app:
|
60 |
gr.Markdown("# π€ Evaluating LLM Responses with keval")
|
|
|
|
|
|
|
61 |
gr.Markdown("## What Is keval?")
|
62 |
gr.Markdown(description_text1)
|
63 |
gr.Markdown("## Benchmark and Dataset for keval")
|
|
|
38 |
|
39 |
|
40 |
def main():
|
41 |
+
description_text1 = """<span style="font-size: 23px;">The keval model is an advanced evaluation model specifically designed to assess Korean language models using the LLM-as-a-judge approach. Unlike traditional evaluation methods that relied on ChatGPT, keval provides an independent and specialized alternative for evaluating Korean LLMs.</span></br>
|
|
|
42 |
<span style="font-size: 23px;">keval is based on the Gemma2-9B architecture and has been further optimized using Supervised Fine-Tuning (SFT) and Direct Policy Optimization (DPO). The model is trained on the newly developed Ko-Bench dataset, which is inspired by MT-Bench but adapted to capture Korean linguistic nuances more effectively.</span> </br></br>
|
43 |
"""
|
44 |
|
45 |
+
description_text2 = """<span style="font-size: 23px;">keval is trained and evaluated using the Ko-Bench dataset, a custom-built benchmark inspired by MT-Bench but specifically tailored for assessing Korean language models. The dataset includes diverse tasks covering a wide range of user scenarios, allowing for a thorough evaluation of: Multi-turn conversation ability, Instruction adherence, Contextual understanding.</span></br></br>
|
|
|
46 |
"""
|
47 |
|
48 |
+
description_text3 = """<span style="font-size: 23px;">keval currently has the following versions available β keval-2-1b, keval-2-3b.</span></br></br>
|
|
|
49 |
"""
|
50 |
|
51 |
examples_list = [["μ΅κ·Ό μ μ£Όλ μ¬νμ λ€λ
μ€λ©΄μ κΌ κ°λ΄μΌ ν λͺ
μλ₯Ό κ°μ‘°νλ μ¬λ―Έμλ μ¬ν λΈλ‘κ·Έ κΈμ μμ±νμμ€.","## μ μ£Όλ? μ€λλΆν° \"μ μ£Όλ ννκ°\"λ‘ μ°νλ λ²! ποΈπ₯\n\nμ μ£Όλ, κ·Έ μ체λ‘λ λ©μ§ μ΄λ¦μ΄μ£ ? λ°λ€κ° κ·Έλ €λΈ μ¬, νλμ΄ λΉμ΄λΈ μμ°, κ·Έλ¦¬κ³ μΈκ°μ μκΈΈμ΄ λ§λ 맀λ ₯κΉμ§! μ μ£Ό ννκ°κ° λκΈ° μν΄ κΌ κ°λ΄μΌ ν λͺ
μλ€, λκ° μμκΉμ? μ¬κΈ° λ±ν μ‘μ±μ²λΌ κΈ°λλλ μκ°μ΄ μμ§ μλμ? κ·Έ μκ°μ λ§λλ κ³³λ€μ΄ λ°λ‘ μ΄κ³³μ΄λΌλ©΄, λκ° λ€μλ 보μΈμ!\n\n**1. μ λͺ
ν κ³³λ, μ¨κ²¨μ§ 보μλ ννν΄λ΄μΌμ§!**\n\nμ μ£Όλμμ λΉ μ§ μ μλ λͺ
μμ£ . νλΌμ°μ μ λΉν μμ° μμ μ¨κ²¨μ§ μ²κΈΈμ κ±ΈμΌλ©°, λ°λ€ λλ¨Έμ νΈλ₯Έ λμ μΏλ³Ό μ μλ νλΌμ° λ±λ°μ μμ§ λͺ»ν κ²½νμ μ μ¬ν κ±°μμ. νμ§λ§ μ΄κ³³μ 맀λ ₯μ κ·Έ μμ²΄λ‘ λλμ§ μμμ. μ¨κ²¨μ§ 보μ κ°μ κ³³λ€μ μ°Ύμλ΄λ κ²μ΄ μ μ£Ό ννκ°μ μ¬λ―Έμ£ ! μλ₯Ό λ€μ΄, μ€λ¦ μ€μ¬λΆμ μ리 μ‘μ μ²μΉμμ λ°λ€κ° λΉμ΄λΈ μμ μνμ²λΌ μ λΉλ‘κ³ , μ§λ¦¬μ° μ£Όλ³μ μ λ―Όμλ§μμ μκ°μ΄ λ©μΆ λ―ν μκ°μ μ μ¬ν κ±°μμ.\n\n**2. λ§μλ μμλ ννμ μΌνμ΄μ§!**\n\nμ μ£Όλμ λ§μ ννκ°μ νμ λΆλμμ£Όμ£ ! μ±μ±ν ν΄μ°λ¬Όλ‘ λ§λ νμ λ©κ², κ·Έλ¦¬κ³ λ§μλ νμκ³Ό ν¨κ», μ μ£Όλμ νΉμμ λ΄μ μμλ€μ λ§λ³΄μΈμ. νΉν, νλΌμ° κ·Όμ²μ μ¨κ²¨μ§ νμλΉμμλ λ§μ§λ€μ΄ λͺ¨μ¬ μμ΄μ. μ΄κ³³μμ μ»λ λ§μ μ¦κ±°μμ ννμ νΌλ‘λ₯Ό μ»μ΄λ΄λ μ΅κ³ μ 보μμ΄ λ κ±°μμ.\n\n**3. μμ°κ³Ό ν¨κ»νλ μ‘ν°λΉν°λ νμ!**\n\nμ μ£Όλμ μμ°μ ννκ°λ₯Ό μν 무νν λμ΄ν°μ£ ! μν, μ€λ
Έν΄λ§, νμ΄νΉ λ± λ€μν μ‘ν°λΉν°λ₯Ό ν΅ν΄ μμ°κ³Όμ κ΅κ°μ κ²½νν΄λ³΄μΈμ. μλ₯Ό λ€μ΄, μ μ£Ό ν΄μμ μ λ°λΌ λ¬λ¦¬λ λ°λ€ μ μμ κ±° νκΈ°λ, μ¨κ²¨μ§ ν΄λ³μμμ μΊ νμ μμ§ λͺ»ν μΆμ΅μ μ μ¬ν κ±°μμ.\n\n**4. μ μ£Όλ§μ λ¬Ένμ μμ λ ννν΄λ³΄μΈμ!**\n\nμ μ£Όλλ λ¨μν μμ°μ μλ¦λ€μλ§μ΄ μλ, λ
νΉν λ¬Ένμ μμ λ‘λ μ λͺ
ν΄μ. μλ₯Ό λ€μ΄, μ μ£Ό λ―Όμμ΄μ΄λ μ€λ¦ λ°λ¬Όκ΄μ μ μ£Όμ μμ¬μ λ¬Ένλ₯Ό μΏλ³Ό μ μλ κ³³μ΄λ©°, μ μ£Ό νΉμ μ 곡μνμ ꡬ경νλ©° μ¦κ±°μ΄ μκ°μ λ³΄λΌ μ μμ΄μ.\n\nμ΄μ μ μ£Όλ ννκ°κ° λλ μ€λΉκ° λλ¬μΌλ, λ λ 보μΈμ! μ μ£Όλμ 맀λ ₯μ λλΌλ©°, μμ λ§μ νΉλ³ν μ¬ν μ΄μΌκΈ°λ₯Ό λ§λ€μ΄λ³΄μΈμ. κ·Έλ¦¬κ³ μμ§ λ§μΈμ, μ μ£Όλ λΉμ μ΄ λ€μ νλ² λμμ¬ λ§ν κ³³μ΄λΌλ μ¬μ€μ! π",None, None, None, None, None],
|
|
|
55 |
|
56 |
with gr.Blocks() as app:
|
57 |
gr.Markdown("# π€ Evaluating LLM Responses with keval")
|
58 |
+
gr.Markdown("")
|
59 |
+
gr.Markdown("## | [Model](https://huggingface.co/collections/davidkim205/keval-2-67ac5400f5eef4984cc5dbbb) | [Paper](https://davidkim205.github.io/keval.html) | [Code](https://github.com/davidkim205/simple-keval) |")
|
60 |
+
gr.Markdown("")
|
61 |
gr.Markdown("## What Is keval?")
|
62 |
gr.Markdown(description_text1)
|
63 |
gr.Markdown("## Benchmark and Dataset for keval")
|