Spaces:
Runtime error
Runtime error
dev(hansbug): merge from main
Browse files- README.md +71 -5
- app.py +115 -27
- llmriddles/assets/banner.svg +91 -0
- llmriddles/assets/wechat.jpeg +0 -0
- llmriddles/questions/__init__.py +2 -0
- llmriddles/questions/executor.py +3 -3
- llmriddles/questions/level1.py +184 -40
- llmriddles/questions/level3.py +142 -0
README.md
CHANGED
@@ -11,11 +11,77 @@ license: apache-2.0
|
|
11 |
python_version: 3.8
|
12 |
---
|
13 |
|
14 |
-
#
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
python_version: 3.8
|
12 |
---
|
13 |
|
14 |
+
# LLM Riddles
|
15 |
|
16 |
+
<!-- <div align="center">
|
17 |
+
<br>
|
18 |
+
<a href="https://github.com/opendilab/LLMRiddles/blob/main/llmriddles/assets/banner.svg">
|
19 |
+
<img src="https://github.com/opendilab/LLMRiddles/blob/main/llmriddles/assets/banner.svg" width="1000" height="200" alt="Click to see the source">
|
20 |
+
</a>
|
21 |
+
<br>
|
22 |
+
</div> -->
|
23 |
+
|
24 |
+
<div align="center">
|
25 |
+
<br>
|
26 |
+
<a href="https://github.com/opendilab/LLMRiddles/blob/main/llmriddles/assets/banner.svg">
|
27 |
+
<img src="https://github.com/opendilab/LLMRiddles/blob/main/llmriddles/assets/banner.svg" width="1000" height="200" alt="Click to see the source">
|
28 |
+
</a>
|
29 |
+
<br>
|
30 |
+
</div>
|
31 |
+
|
32 |
+
## :thinking: 什么是LLM Riddles
|
33 |
+
欢迎来到 LLM Riddles!这是一个与语言模型斗智斗勇的游戏。在游戏中,你需要构造与语言模型交互的问题,来得到符合要求的答案。在这个过程中,你可以开动脑筋,用你想到的所有方式,让模型输出答案要求的结果。
|
34 |
|
35 |
+
## :space_invader: 如何试玩
|
36 |
+
我们提供了在线版本以供玩家直接访问试玩,本地部署可以通过以下方式:
|
37 |
+
### ChatGPT + 中文
|
38 |
+
```shell
|
39 |
+
QUESTION_LANG=cn QUESTION_LLM='chatgpt' QUESTION_LLM_KEY=<your API key> python3 -u app.py
|
40 |
+
```
|
41 |
+
### ChatGPT + 英文
|
42 |
+
```shell
|
43 |
+
QUESTION_LANG=en QUESTION_LLM='chatgpt' QUESTION_LLM_KEY=<your API key> python3 -u app.py
|
44 |
+
```
|
45 |
+
### LLaMA2-7b + 中文
|
46 |
+
```shell
|
47 |
+
QUESTION_LANG=cn QUESTION_LLM='llama2-7b' python3 -u app.py
|
48 |
+
```
|
49 |
+
### LLaMA2-7b + 英文
|
50 |
+
```shell
|
51 |
+
QUESTION_LANG=en QUESTION_LLM='llama2-7b' python3 -u app.py
|
52 |
```
|
53 |
+
## :technologist: 为什么制作这个游戏
|
54 |
+
|
55 |
+
我们的目标是通过这一游戏,让参与者深入领略到提示工程(prompt engineering)和自然语言处理的令人着迷之处。这个过程将向玩家们展示,如何巧妙地构建提示词(prompts),以及如何运用它们来引发人工智能系统的惊人反应,同时也帮助他们更好地理解深度学习和自然语言处理技术的不可思议之处。
|
56 |
+
|
57 |
+
## :raising_hand: 如何提交设计好的关卡
|
58 |
+
如果有好玩的问题或想法,欢迎玩家提交自己的创意,可以通过
|
59 |
+
[发起 Pull Request](https://github.com/opendilab/LLMRiddles/compare) 向我们提交, 我们会在审核通过后收录至关卡中。
|
60 |
+
|
61 |
+
## :writing_hand: 未来计划
|
62 |
+
|
63 |
+
- [x] 支持自定义关卡
|
64 |
+
- [ ] 在线试玩链接
|
65 |
+
- [ ] Hugging Face Space 链接
|
66 |
+
- [ ] 支持LLaMA2-7B(英文)
|
67 |
+
- [ ] 支持Mistral-7B(英文)
|
68 |
+
- [ ] 支持Baichuan2-7B(中文)
|
69 |
+
- [ ] LLM 推理速度优化
|
70 |
+
|
71 |
+
|
72 |
+
## :speech_balloon: 反馈问题 & 提出建议
|
73 |
+
- 在 GitHub 上[发起 Issue](https://github.com/opendilab/CodeMorpheus/issues/new/choose)
|
74 |
+
- 通过邮件与我们联系 ([email protected])
|
75 |
+
|
76 |
+
- 在OpenDILab的群组中加入讨论(通过 WeChat: ding314assist 添加小助手微信)
|
77 |
+
<img src=https://github.com/opendilab/LLMRiddles/blob/main/llmriddles/assets/wechat.jpeg width=35% />
|
78 |
+
|
79 |
+
## Special Thanks
|
80 |
+
- 感谢 [Haoqiang Fan](https://www.zhihu.com/people/haoqiang-fan) 的原始创意和题目,为本项目的开发和扩展提供了灵感与动力。
|
81 |
+
- 感谢 [HuggingFace](https://huggingface.co) 对游戏的支持与协助。
|
82 |
+
- 感谢 [LLM Riddles contributors](https://github.com/opendilab/LLMRiddles/graphs/contributors) 的实现与支持。
|
83 |
+
|
84 |
+
## License
|
85 |
+
All code within this repository is under [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
|
86 |
+
|
87 |
+
<p align="right">(<a href="#top">back to top</a>)</p>
|
app.py
CHANGED
@@ -9,11 +9,99 @@ from llmriddles.questions import list_ordered_questions
|
|
9 |
_QUESTION_IDS = {}
|
10 |
_QUESTIONS = list_ordered_questions()
|
11 |
_LANG = os.environ.get('QUESTION_LANG', 'cn')
|
|
|
12 |
_LLM = os.environ.get('QUESTION_LLM', 'chatgpt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
def _need_api_key():
|
16 |
-
return _LLM == 'chatgpt'
|
17 |
|
18 |
|
19 |
def _get_api_key_cfgs(api_key):
|
@@ -24,21 +112,25 @@ def _get_api_key_cfgs(api_key):
|
|
24 |
|
25 |
|
26 |
if __name__ == '__main__':
|
27 |
-
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
28 |
with gr.Row():
|
29 |
with gr.Column():
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
with gr.Column():
|
36 |
-
gr_api_key = gr.Text(placeholder='Your API Key', label='API Key', type='password',
|
37 |
-
visible=_need_api_key())
|
38 |
gr_uuid = gr.Text(value='', visible=False)
|
39 |
-
gr_predict = gr.Label(label=
|
40 |
-
|
41 |
-
|
|
|
42 |
|
43 |
|
44 |
def _next_question(uuid_):
|
@@ -48,22 +140,20 @@ if __name__ == '__main__':
|
|
48 |
_qid = _QUESTION_IDS.get(uuid_, -1)
|
49 |
_qid += 1
|
50 |
_QUESTION_IDS[uuid_] = _qid
|
51 |
-
print(_QUESTION_IDS)
|
52 |
|
53 |
if _qid >= len(_QUESTIONS):
|
54 |
del _QUESTION_IDS[uuid_]
|
55 |
-
return
|
56 |
-
gr.Button(
|
57 |
-
gr.Button(
|
58 |
''
|
59 |
else:
|
60 |
executor = QuestionExecutor(_QUESTIONS[_qid], _LANG)
|
61 |
return executor.question_text, '', '', {}, '', \
|
62 |
-
gr.Button(
|
63 |
-
gr.Button(
|
64 |
uuid_
|
65 |
|
66 |
-
|
67 |
gr_next.click(
|
68 |
fn=_next_question,
|
69 |
inputs=[gr_uuid],
|
@@ -76,22 +166,19 @@ if __name__ == '__main__':
|
|
76 |
|
77 |
def _submit_answer(qs_text: str, api_key: str, uuid_: str):
|
78 |
if _need_api_key() and not api_key:
|
79 |
-
|
80 |
-
gr.Button('Next', interactive=False), uuid_
|
81 |
|
82 |
-
print(_QUESTION_IDS)
|
83 |
_qid = _QUESTION_IDS[uuid_]
|
84 |
executor = QuestionExecutor(
|
85 |
_QUESTIONS[_qid], _LANG,
|
86 |
-
llm=_LLM, llm_cfgs=_get_api_key_cfgs(api_key) if _need_api_key() else {}
|
87 |
)
|
88 |
answer_text, correctness, explanation = executor.check(qs_text)
|
89 |
-
labels = {
|
90 |
if correctness:
|
91 |
-
return answer_text, labels, explanation, gr.Button(
|
92 |
else:
|
93 |
-
return answer_text, labels, explanation, gr.Button(
|
94 |
-
|
95 |
|
96 |
gr_submit.click(
|
97 |
_submit_answer,
|
@@ -99,4 +186,5 @@ if __name__ == '__main__':
|
|
99 |
outputs=[gr_answer, gr_predict, gr_explanation, gr_next, gr_uuid],
|
100 |
)
|
101 |
|
102 |
-
|
|
|
|
9 |
_QUESTION_IDS = {}
|
10 |
_QUESTIONS = list_ordered_questions()
|
11 |
_LANG = os.environ.get('QUESTION_LANG', 'cn')
|
12 |
+
assert _LANG in ['cn', 'en'], _LANG
|
13 |
_LLM = os.environ.get('QUESTION_LLM', 'chatgpt')
|
14 |
+
assert _LLM in ['chatgpt', 'llama2-7b'], _LLM
|
15 |
+
_LLM_KEY = os.environ.get('QUESTION_LLM_KEY', None)
|
16 |
+
|
17 |
+
if _LANG == "cn":
|
18 |
+
title = "完蛋!我被 LLM 拿捏了"
|
19 |
+
requirement_ph = """
|
20 |
+
欢迎来到 LLM Riddles!
|
21 |
+
|
22 |
+
你将通过本游戏对大语言模型产生更深刻的理解。在本游戏中,你需要构造一个提给语言大模型的问题,使得它回复的答案符合题目要求。
|
23 |
+
|
24 |
+
点击\"下一题\"即可开始游戏
|
25 |
+
"""
|
26 |
+
requirement_label = "游戏须知/说明"
|
27 |
+
question_ph = "你对大语言模型的提问"
|
28 |
+
question_label = "玩家提问栏"
|
29 |
+
answer_ph = "大语言模型的回答"
|
30 |
+
answer_label = "大语言模型回答栏"
|
31 |
+
submit_label = "提交"
|
32 |
+
next_label = "下一题"
|
33 |
+
api_ph = "你个人的大语言模型 API Key (例如:ChatGPT)"
|
34 |
+
api_label = "API key"
|
35 |
+
predict_label = "结果正确性"
|
36 |
+
explanation_label = "结果详细解释"
|
37 |
+
game_cleared_label = "祝贺!你已成功通关!"
|
38 |
+
correct_label = "正确"
|
39 |
+
wrong_label = "错误"
|
40 |
+
api_error_info = "请在提交问题之前先输入你的 API Key"
|
41 |
+
try_again_label = "再玩一次"
|
42 |
+
title_markdown = """
|
43 |
+
<div align="center">
|
44 |
+
<img src="https://raw.githubusercontent.com/opendilab/LLMRiddles/main/llmriddles/assets/banner.svg" width="80%" height="20%" alt="Banner Image">
|
45 |
+
</div>
|
46 |
+
<h2 style="text-align: center; color: black;"><a href="https://github.com/OpenDILab"> 🎭LLM Riddles:完蛋!我被 LLM 拿捏了</a></h2>
|
47 |
+
<h4 align="center"> 如果你喜欢这个项目,请给我们在 GitHub 点个 star ✨ 。我们将会持续保持更新。再次感谢游戏<a href="https://www.zhihu.com/people/haoqiang-fan"> 原作者 </a>的奇思妙想! </h4>
|
48 |
+
<strong><h5 align="center">注意:算法模型的输出可能包含一定的随机性。相关结果不代表任何开发者和相关 AI 服务的态度和意见。本项目开发者不对生成结果作任何保证,仅供娱乐。<h5></strong>
|
49 |
+
"""
|
50 |
+
tos_markdown = """
|
51 |
+
### 使用条款
|
52 |
+
玩家使用本服务须同意以下条款:
|
53 |
+
该服务是一项探索性研究预览版,仅供非商业用途。它仅提供有限的安全措施,并可能生成令人反感的内容。不得将其用于任何非法、有害、暴力、种族主义等目的。该服务可能会收集玩家对话数据以供未来研究之用。
|
54 |
+
如果您的游玩体验有不佳之处,请发送邮件至 [email protected] ! 我们将删除相关信息,并不断改进这个项目。
|
55 |
+
为了获得最佳体验,请使用台式电脑进行此预览版游戏,因为移动设备可能会影响可视化效果。
|
56 |
+
**版权所有 2023 OpenDILab。**
|
57 |
+
"""
|
58 |
+
elif _LANG == "en":
|
59 |
+
title = "LLM Riddles: Oops! Rolling in LLM."
|
60 |
+
requirement_ph = """
|
61 |
+
Welcome to LLM Riddles!
|
62 |
+
|
63 |
+
In this game, you'll gain a deeper understanding of language models. Your challenge is to create a question to ask a language model in a way that the answer it provides meets specific criteria.
|
64 |
+
|
65 |
+
Click \'Next\' to Start
|
66 |
+
"""
|
67 |
+
requirement_label = "Game Requirements"
|
68 |
+
question_ph = "Your Question for LLM"
|
69 |
+
question_label = "Question"
|
70 |
+
answer_ph = "Answer From LLM"
|
71 |
+
answer_label = "Answer"
|
72 |
+
submit_label = "Submit"
|
73 |
+
next_label = "Next"
|
74 |
+
api_ph = "Your API Key (e.g. ChatGPT)"
|
75 |
+
api_label = "API key"
|
76 |
+
predict_label = "Correctness"
|
77 |
+
explanation_label = "Explanation"
|
78 |
+
game_cleared_label = "Congratulations!"
|
79 |
+
correct_label = "Correct"
|
80 |
+
wrong_label = "Wrong"
|
81 |
+
api_error_info = "Please Enter API Key Before Submitting Question."
|
82 |
+
try_again_label = "Try Again"
|
83 |
+
title_markdown = """
|
84 |
+
<div align="center">
|
85 |
+
<img src="https://raw.githubusercontent.com/opendilab/LLMRiddles/main/llmriddles/assets/banner.svg" width="80%" height="20%" alt="Banner Image">
|
86 |
+
</div>
|
87 |
+
<h2 style="text-align: center; color: black;"><a href="https://github.com/OpenDILab"> 🎭LLM Riddles: Oops! Rolling in LLM.</a></h2>
|
88 |
+
<h4 align="center"> If you like our project, please give us a star ✨ on GitHub for latest update. Thanks for the interesting idea of the original game <a href="https://www.zhihu.com/people/haoqiang-fan"> author </a>. </h4>
|
89 |
+
<strong><h5 align="center">Notice: The output is generated by algorithm scheme and may involve some randomness. It does not represent the attitudes and opinions of any developers and AI services in this project. We do not make any guarantees about the generated content.<h5></strong>
|
90 |
+
"""
|
91 |
+
tos_markdown = """
|
92 |
+
### Terms of use
|
93 |
+
By using this service, players are required to agree to the following terms:
|
94 |
+
The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
|
95 |
+
Please send email to [email protected] if you get any inappropriate answer! We will delete those and keep improving our moderator.
|
96 |
+
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
97 |
+
**Copyright 2023 OpenDILab.**
|
98 |
+
"""
|
99 |
+
else:
|
100 |
+
raise KeyError("invalid _LANG: {}".format(_LANG))
|
101 |
|
102 |
|
103 |
def _need_api_key():
|
104 |
+
return _LLM == 'chatgpt' and _LLM_KEY is None
|
105 |
|
106 |
|
107 |
def _get_api_key_cfgs(api_key):
|
|
|
112 |
|
113 |
|
114 |
if __name__ == '__main__':
|
115 |
+
with gr.Blocks(title=title, theme='ParityError/Interstellar') as demo:
|
116 |
+
gr.Markdown(title_markdown)
|
117 |
+
|
118 |
+
with gr.Row():
|
119 |
+
gr_requirement = gr.TextArea(placeholder=requirement_ph, label=requirement_label, lines=4)
|
120 |
with gr.Row():
|
121 |
with gr.Column():
|
122 |
+
gr_question = gr.TextArea(placeholder=question_ph, label=question_label)
|
123 |
+
gr_api_key = gr.Text(placeholder=api_ph, label=api_label, type='password', visible=_need_api_key())
|
124 |
+
with gr.Row():
|
125 |
+
gr_submit = gr.Button(submit_label, interactive=True)
|
126 |
+
gr_next = gr.Button(next_label)
|
127 |
|
128 |
with gr.Column():
|
|
|
|
|
129 |
gr_uuid = gr.Text(value='', visible=False)
|
130 |
+
gr_predict = gr.Label(label=predict_label)
|
131 |
+
gr_answer = gr.TextArea(label=answer_label, lines=3)
|
132 |
+
gr_explanation = gr.TextArea(label=explanation_label, lines=1)
|
133 |
+
gr.Markdown(tos_markdown)
|
134 |
|
135 |
|
136 |
def _next_question(uuid_):
|
|
|
140 |
_qid = _QUESTION_IDS.get(uuid_, -1)
|
141 |
_qid += 1
|
142 |
_QUESTION_IDS[uuid_] = _qid
|
|
|
143 |
|
144 |
if _qid >= len(_QUESTIONS):
|
145 |
del _QUESTION_IDS[uuid_]
|
146 |
+
return game_cleared_label, '', '', {}, '', \
|
147 |
+
gr.Button(submit_label, interactive=True), \
|
148 |
+
gr.Button(try_again_label, interactive=True), \
|
149 |
''
|
150 |
else:
|
151 |
executor = QuestionExecutor(_QUESTIONS[_qid], _LANG)
|
152 |
return executor.question_text, '', '', {}, '', \
|
153 |
+
gr.Button(submit_label, interactive=True), \
|
154 |
+
gr.Button(next_label, interactive=False), \
|
155 |
uuid_
|
156 |
|
|
|
157 |
gr_next.click(
|
158 |
fn=_next_question,
|
159 |
inputs=[gr_uuid],
|
|
|
166 |
|
167 |
def _submit_answer(qs_text: str, api_key: str, uuid_: str):
|
168 |
if _need_api_key() and not api_key:
|
169 |
+
raise gr.Error(api_error_info)
|
|
|
170 |
|
|
|
171 |
_qid = _QUESTION_IDS[uuid_]
|
172 |
executor = QuestionExecutor(
|
173 |
_QUESTIONS[_qid], _LANG,
|
174 |
+
llm=_LLM, llm_cfgs=_get_api_key_cfgs(api_key) if _need_api_key() else {'api_key': _LLM_KEY}
|
175 |
)
|
176 |
answer_text, correctness, explanation = executor.check(qs_text)
|
177 |
+
labels = {correct_label: 1.0} if correctness else {wrong_label: 1.0}
|
178 |
if correctness:
|
179 |
+
return answer_text, labels, explanation, gr.Button(next_label, interactive=True), uuid_
|
180 |
else:
|
181 |
+
return answer_text, labels, explanation, gr.Button(next_label, interactive=False), uuid_
|
|
|
182 |
|
183 |
gr_submit.click(
|
184 |
_submit_answer,
|
|
|
186 |
outputs=[gr_answer, gr_predict, gr_explanation, gr_next, gr_uuid],
|
187 |
)
|
188 |
|
189 |
+
concurrency = int(os.environ.get('CONCURRENCY', os.cpu_count()))
|
190 |
+
demo.queue().launch(max_threads=concurrency)
|
llmriddles/assets/banner.svg
ADDED
|
llmriddles/assets/wechat.jpeg
ADDED
![]() |
llmriddles/questions/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
from .executor import QuestionExecutor
|
2 |
from .level1 import __file__ as _level1_file_
|
|
|
3 |
from .question import Question, register_question, list_ordered_questions
|
4 |
|
5 |
_ = _level1_file_
|
|
|
|
1 |
from .executor import QuestionExecutor
|
2 |
from .level1 import __file__ as _level1_file_
|
3 |
+
from .level3 import __file__ as _level3_file_
|
4 |
from .question import Question, register_question, list_ordered_questions
|
5 |
|
6 |
_ = _level1_file_
|
7 |
+
_ = _level3_file_
|
llmriddles/questions/executor.py
CHANGED
@@ -17,11 +17,11 @@ class QuestionExecutor:
|
|
17 |
|
18 |
def check(self, qs_text: str) -> Tuple[str, bool, str]:
|
19 |
answer_text = get_llm_fn(self.llm)(qs_text, **self.llm_cfgs)
|
20 |
-
correct, explanation = self.check_answer(answer_text)
|
21 |
return answer_text, correct, explanation
|
22 |
|
23 |
-
def check_answer(self, answer_text: str) -> Tuple[bool, str]:
|
24 |
-
correct, explanation = self.question.checker(self.question_text, answer_text, self.lang)
|
25 |
if explanation is None:
|
26 |
if correct:
|
27 |
explanation = 'LLM的回答满足要求' if self.lang == 'cn' else 'Correct Answer From LLM'
|
|
|
17 |
|
18 |
def check(self, qs_text: str) -> Tuple[str, bool, str]:
|
19 |
answer_text = get_llm_fn(self.llm)(qs_text, **self.llm_cfgs)
|
20 |
+
correct, explanation = self.check_answer(qs_text, answer_text)
|
21 |
return answer_text, correct, explanation
|
22 |
|
23 |
+
def check_answer(self, user_text: str, answer_text: str) -> Tuple[bool, str]:
|
24 |
+
correct, explanation = self.question.checker(self.question_text, user_text, answer_text, self.lang)
|
25 |
if explanation is None:
|
26 |
if correct:
|
27 |
explanation = 'LLM的回答满足要求' if self.lang == 'cn' else 'Correct Answer From LLM'
|
llmriddles/questions/level1.py
CHANGED
@@ -1,40 +1,184 @@
|
|
1 |
-
from .question import register_question
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
if
|
32 |
-
return
|
33 |
-
else:
|
34 |
-
return
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .question import register_question
|
2 |
+
|
3 |
+
def count_english_words(text: str):
|
4 |
+
return len(text.split(' '))
|
5 |
+
|
6 |
+
def count_chinese_words(text: str):
|
7 |
+
return len(text)
|
8 |
+
|
9 |
+
def check_if_chinese(text: str):
|
10 |
+
return all('\u4e00' <= char <= '\u9fff' for char in text)
|
11 |
+
|
12 |
+
def count_words(text: str, contain_punctuation: bool = False):
|
13 |
+
chinese_words = []
|
14 |
+
english_words = []
|
15 |
+
other_words = []
|
16 |
+
temp_english_words = []
|
17 |
+
for char in text:
|
18 |
+
if '\u4e00' <= char <= '\u9fff':
|
19 |
+
chinese_words.append(char)
|
20 |
+
if len(temp_english_words) > 0:
|
21 |
+
english_words.append(''.join(temp_english_words))
|
22 |
+
temp_english_words = []
|
23 |
+
else:
|
24 |
+
if char.isalpha():
|
25 |
+
temp_english_words.append(char)
|
26 |
+
else:
|
27 |
+
if len(temp_english_words) > 0:
|
28 |
+
english_words.append(''.join(temp_english_words))
|
29 |
+
temp_english_words = []
|
30 |
+
other_words.append(char)
|
31 |
+
if contain_punctuation:
|
32 |
+
return len(chinese_words)+len(english_words)+len(other_words)
|
33 |
+
else:
|
34 |
+
return len(chinese_words)+len(english_words)
|
35 |
+
|
36 |
+
|
37 |
+
CN_TEXT_1 = """
|
38 |
+
第一章第一题(初来乍到),请你构造一个问题使模型的回答是一字不差的“1+1=3”(不需要引号)。
|
39 |
+
|
40 |
+
请在下面的输入框内填写你的构造并点击按钮提交。
|
41 |
+
"""
|
42 |
+
|
43 |
+
EN_TEXT_1 = """
|
44 |
+
For the first question in chapter 1, please construct a query for the model that will result in the exact answer "1+1=3" (no quotes needed).
|
45 |
+
|
46 |
+
Please enter your query below and click the submit button
|
47 |
+
"""
|
48 |
+
|
49 |
+
|
50 |
+
def _checker_1(question_text: str, user_text: str, answer_text: str, lang: str):
|
51 |
+
_ = question_text, lang
|
52 |
+
answer_text = answer_text.strip()
|
53 |
+
user_text = user_text.strip()
|
54 |
+
if answer_text == '1+1=3':
|
55 |
+
return True, None
|
56 |
+
else:
|
57 |
+
return False, None
|
58 |
+
|
59 |
+
|
60 |
+
register_question({
|
61 |
+
'cn': CN_TEXT_1,
|
62 |
+
'en': EN_TEXT_1,
|
63 |
+
}, _checker_1)
|
64 |
+
|
65 |
+
|
66 |
+
CN_TEXT_2 = """
|
67 |
+
第一章第二题(小试牛刀),请你输入三个字(及)以内的问题,使模型的回答在30个字以上。
|
68 |
+
|
69 |
+
请在下面的输入框内填写你的问题并点击按钮提交。
|
70 |
+
"""
|
71 |
+
|
72 |
+
EN_TEXT_2 = """
|
73 |
+
For the second question in chapter 1, please enter a question within three words so that the model’s answer is more than 30 words.
|
74 |
+
|
75 |
+
Please enter your query below and click the submit button
|
76 |
+
"""
|
77 |
+
|
78 |
+
|
79 |
+
def _checker_2(question_text: str, user_text: str, answer_text: str, lang: str):
|
80 |
+
_ = question_text, lang
|
81 |
+
answer_text = answer_text.strip()
|
82 |
+
user_text = user_text.strip()
|
83 |
+
|
84 |
+
if count_words(user_text) > 3:
|
85 |
+
return False, "用户的问题长度应该在三个字及以内" if lang == 'cn' else 'Question should be within three words.'
|
86 |
+
elif count_words(answer_text) <= 30:
|
87 |
+
return False, "大语言模型的答案应该超过30个字" if lang == 'cn' else 'cAnswer should be more than 30 words.'
|
88 |
+
else:
|
89 |
+
return True, None
|
90 |
+
|
91 |
+
|
92 |
+
register_question({
|
93 |
+
'cn': CN_TEXT_2,
|
94 |
+
'en': EN_TEXT_2,
|
95 |
+
}, _checker_2)
|
96 |
+
|
97 |
+
|
98 |
+
CN_TEXT_3 = """
|
99 |
+
第一章第三题(短说长话),请你输入一个字的问题,使模型的回答在100个字以上。
|
100 |
+
|
101 |
+
请在下面的输入框内填写你的问题并点击按钮提交。
|
102 |
+
"""
|
103 |
+
|
104 |
+
EN_TEXT_3 = """
|
105 |
+
For the third question in chapter 1, please enter a one-word question so that the model’s answer is more than 100 words.
|
106 |
+
|
107 |
+
Please enter your query below and click the submit button
|
108 |
+
"""
|
109 |
+
|
110 |
+
def _checker_3(question_text: str, user_text: str, answer_text: str, lang: str):
|
111 |
+
_ = question_text, lang
|
112 |
+
answer_text = answer_text.strip()
|
113 |
+
user_text = user_text.strip()
|
114 |
+
|
115 |
+
if count_words(user_text) > 1:
|
116 |
+
return False, "用户的问题长度应该在一个字及以内" if lang == 'cn' else 'Question should be one word.'
|
117 |
+
elif count_words(answer_text) <= 100:
|
118 |
+
return False, "大语言模型的答案应该超过100个字" if lang == 'cn' else 'Answer should be more than 100 words.'
|
119 |
+
else:
|
120 |
+
return True, None
|
121 |
+
|
122 |
+
register_question({
|
123 |
+
'cn': CN_TEXT_3,
|
124 |
+
'en': EN_TEXT_3,
|
125 |
+
}, _checker_3)
|
126 |
+
|
127 |
+
|
128 |
+
CN_TEXT_4 = """
|
129 |
+
第一章第四题(短说短话),请输入一个字的问题,使模型的回答字数小于20个字。
|
130 |
+
|
131 |
+
请在下面的输入框内填写你的问题并点击按钮提交。
|
132 |
+
"""
|
133 |
+
|
134 |
+
EN_TEXT_4 = """
|
135 |
+
For the fourth question in chapter 1, please enter a one-word question so that the model’s answer is less than 20 words.
|
136 |
+
|
137 |
+
Please enter your query below and click the submit button
|
138 |
+
"""
|
139 |
+
|
140 |
+
def _checker_4(question_text: str, user_text: str, answer_text: str, lang: str):
|
141 |
+
_ = question_text, lang
|
142 |
+
answer_text = answer_text.strip()
|
143 |
+
user_text = user_text.strip()
|
144 |
+
|
145 |
+
if count_words(user_text) > 1:
|
146 |
+
return False, "用户的问题长度应该在一个字及以内" if lang == 'cn' else 'Question should be one word.'
|
147 |
+
elif count_words(answer_text) >= 20:
|
148 |
+
return False, "大语言模型的答案应该小于100个字" if lang == 'cn' else 'Answer should be less than 20 words.'
|
149 |
+
else:
|
150 |
+
return True, None
|
151 |
+
|
152 |
+
register_question({
|
153 |
+
'cn': CN_TEXT_4,
|
154 |
+
'en': EN_TEXT_4,
|
155 |
+
}, _checker_4)
|
156 |
+
|
157 |
+
|
158 |
+
# CN_TEXT_5 = """
|
159 |
+
# 第一章第五题(回文不变),请输入一个本身不是回文串的问题,使无论正着问还是倒着问,模型的回答是一样的。
|
160 |
+
|
161 |
+
# 请在下面的输入框内填写你的问题并点击按钮提交。
|
162 |
+
# """
|
163 |
+
|
164 |
+
# EN_TEXT_5 = """
|
165 |
+
# For the fourth question in chapter 1, please enter a question that is not a palindrome string so that the model's answer is the same whether it is asked forward or backward.
|
166 |
+
|
167 |
+
# Please enter your query below and click the submit button
|
168 |
+
# """
|
169 |
+
|
170 |
+
# def _checker_5(question_text: str, answer_text: str, lang: str):
|
171 |
+
# _ = question_text, lang
|
172 |
+
# answer_text = answer_text.strip()
|
173 |
+
|
174 |
+
# if count_words(question_text) > 0:
|
175 |
+
# return False, 'Question should be one word.'
|
176 |
+
# elif count_words(answer_text) >= 20:
|
177 |
+
# return False, 'Answer should be less than 20 words.'
|
178 |
+
# else:
|
179 |
+
# return True, None
|
180 |
+
|
181 |
+
# register_question({
|
182 |
+
# 'cn': CN_TEXT_5,
|
183 |
+
# 'en': EN_TEXT_5,
|
184 |
+
# }, _checker_5)
|
llmriddles/questions/level3.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .question import register_question
|
2 |
+
|
3 |
+
def count_words(text: str, contain_punctuation: bool = False):
|
4 |
+
chinese_words = []
|
5 |
+
english_words = []
|
6 |
+
other_words = []
|
7 |
+
temp_english_words = []
|
8 |
+
for char in text:
|
9 |
+
if '\u4e00' <= char <= '\u9fff':
|
10 |
+
chinese_words.append(char)
|
11 |
+
if len(temp_english_words) > 0:
|
12 |
+
english_words.append(''.join(temp_english_words))
|
13 |
+
temp_english_words = []
|
14 |
+
else:
|
15 |
+
if char.isalpha():
|
16 |
+
temp_english_words.append(char)
|
17 |
+
else:
|
18 |
+
if len(temp_english_words) > 0:
|
19 |
+
english_words.append(''.join(temp_english_words))
|
20 |
+
temp_english_words = []
|
21 |
+
other_words.append(char)
|
22 |
+
if contain_punctuation:
|
23 |
+
return len(chinese_words)+len(english_words)+len(other_words)
|
24 |
+
else:
|
25 |
+
return len(chinese_words)+len(english_words)
|
26 |
+
|
27 |
+
CN_TEXT_1 = """
|
28 |
+
第三章第一题(自说自话),请你输入一个至少10个字的提问,使模型的回答和提问完全一样。
|
29 |
+
|
30 |
+
请在下面的输入框内填写你的提问并点击按钮提交。
|
31 |
+
"""
|
32 |
+
|
33 |
+
EN_TEXT_1 = """
|
34 |
+
For the first question in chapter 3, please enter a question of at least ten words so that the model's answer is exatly the same with the question.
|
35 |
+
|
36 |
+
Please enter your query below and click the submit button
|
37 |
+
"""
|
38 |
+
|
39 |
+
def _checker_1(question_text: str, user_text: str, answer_text: str, lang: str):
|
40 |
+
_ = question_text, lang
|
41 |
+
answer_text = answer_text.strip()
|
42 |
+
user_text = user_text.strip()
|
43 |
+
|
44 |
+
if count_words(user_text) < 10:
|
45 |
+
return False, "用户的问题长度应该至少10个字" if lang=='cn' else 'Question should be no less than 10 words.'
|
46 |
+
|
47 |
+
if answer_text == user_text:
|
48 |
+
return True, None
|
49 |
+
else:
|
50 |
+
return False, None
|
51 |
+
|
52 |
+
register_question({
|
53 |
+
'cn': CN_TEXT_1,
|
54 |
+
'en': EN_TEXT_1,
|
55 |
+
}, _checker_1, level=3)
|
56 |
+
|
57 |
+
CN_TEXT_2 = """
|
58 |
+
第三章第二题(话自说自),请你输入一个至少10个字的提问,使模型的回答是问题的反序。
|
59 |
+
|
60 |
+
请在下面的输入框内填写你的提问并点击按钮提交。
|
61 |
+
"""
|
62 |
+
|
63 |
+
EN_TEXT_2 = """
|
64 |
+
For the second question in chapter 3, please enter a question of at least ten words so that the model's answer is the reverse order of the question.
|
65 |
+
|
66 |
+
Please enter your query below and click the submit button
|
67 |
+
"""
|
68 |
+
|
69 |
+
def _checker_2(question_text: str, user_text: str, answer_text: str, lang: str):
|
70 |
+
_ = question_text, lang
|
71 |
+
answer_text = answer_text.strip()
|
72 |
+
user_text = user_text.strip()
|
73 |
+
|
74 |
+
if count_words(user_text) < 10:
|
75 |
+
return False, "用户的问题长度应该至少10个字" if lang=='cn' else 'Question should be no less than 10 words.'
|
76 |
+
|
77 |
+
if lang=='cn':
|
78 |
+
user_text = user_text[::-1]
|
79 |
+
else:
|
80 |
+
words = user_text.split()
|
81 |
+
words.reverse()
|
82 |
+
user_text = ' '.join(words)
|
83 |
+
|
84 |
+
if user_text == answer_text:
|
85 |
+
return True, None
|
86 |
+
else:
|
87 |
+
return False, None
|
88 |
+
|
89 |
+
register_question({
|
90 |
+
'cn': CN_TEXT_2,
|
91 |
+
'en': EN_TEXT_2,
|
92 |
+
}, _checker_2, level=3)
|
93 |
+
|
94 |
+
CN_TEXT_3 = """
|
95 |
+
第三章第三题(就是敢说),请你输入一个不包含“1”和“3”的提问,使模型的回答是一字不差的“1+1=3”(不需要引号)。
|
96 |
+
|
97 |
+
请在下面的输入框内填写你的提问并点击按钮提交。
|
98 |
+
"""
|
99 |
+
|
100 |
+
EN_TEXT_3 = """
|
101 |
+
For the third question in chapter 3, please enter a question not contain "1" or "3" that will result in the exact answer "1+1=3" (no quotes needed).
|
102 |
+
|
103 |
+
Please enter your query below and click the submit button
|
104 |
+
"""
|
105 |
+
|
106 |
+
def _checker_3(question_text: str, user_text: str, answer_text: str, lang: str):
|
107 |
+
_ = question_text, lang
|
108 |
+
answer_text = answer_text.strip()
|
109 |
+
user_text = user_text.strip()
|
110 |
+
|
111 |
+
if '1' in user_text or '3' in user_text:
|
112 |
+
return False, "用户的问题不能包含“1”或“3”" if lang=='cn' else 'Question should not contain \"1\" or \"3\".'
|
113 |
+
|
114 |
+
if answer_text == '1+1=3':
|
115 |
+
return True, None
|
116 |
+
else:
|
117 |
+
return False, None
|
118 |
+
|
119 |
+
register_question({
|
120 |
+
'cn': CN_TEXT_3,
|
121 |
+
'en': EN_TEXT_3,
|
122 |
+
}, _checker_3, level=3)
|
123 |
+
|
124 |
+
# CN_TEXT_4 = """
|
125 |
+
# 第三章第四题(回文协变),请你输入一个本身不是回文串的问题,使得正着问和倒着问时,模型的回答本身不是回文且也是逆序。
|
126 |
+
|
127 |
+
# 请在下面的输入框内填写你的提问并点击按钮提交。
|
128 |
+
# """
|
129 |
+
|
130 |
+
# EN_TEXT_4 = """
|
131 |
+
# For the fourth question in chapter 3, please enter a question that is not a palindrome string, so that the model's answer is also not a palindrome and is in reverse order when asked forward or backward.
|
132 |
+
|
133 |
+
# Please enter your query below and click the submit button
|
134 |
+
# """
|
135 |
+
|
136 |
+
# def _checker_4(question_text: str, user_text: str, answer_text: str, lang: str):
|
137 |
+
# pass
|
138 |
+
|
139 |
+
# register_question({
|
140 |
+
# 'cn': CN_TEXT_4,
|
141 |
+
# 'en': EN_TEXT_4,
|
142 |
+
# }, _checker_4, level=3)
|