greyfoss commited on
Commit
813287b
·
verified ·
1 Parent(s): a6ae6ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -16,12 +16,6 @@ bot_token = "<Assistant>"
16
 
17
  max_context_length = 750
18
 
19
- def is_english_word(tested_string):
20
- pattern = re.compile(r"^[a-zA-Z]+$")
21
- return pattern.match(tested_string) is not None
22
-
23
-
24
-
25
  def format(history):
26
  prompt = bos_token
27
 
@@ -34,6 +28,10 @@ def format(history):
34
  print(prompt)
35
  return prompt
36
 
 
 
 
 
37
  def gradio(model, tokenizer):
38
  def response(
39
  user_input,
@@ -72,13 +70,9 @@ def gradio(model, tokenizer):
72
  )
73
  output = beam_output[0][prompt_length:]
74
 
75
- tokens = tokenizer.convert_ids_to_tokens(output)
76
- for i, token in enumerate(tokens[:-1]):
77
- if is_english_word(token) and is_english_word(tokens[i + 1]):
78
- tokens[i] = token + " "
79
- text = "".join(tokens).replace("##", "").replace("[UNK]", "").strip()
80
-
81
- return text
82
 
83
  bot = gr.Chatbot(show_copy_button=True, show_share_button=True)
84
 
@@ -135,14 +129,13 @@ def gradio(model, tokenizer):
135
  repetition_penalty,
136
  no_repeat_ngram_size,
137
  ],
138
- stop_btn = "🛑 Stop",
139
  retry_btn = "🔄 Regenerate",
140
  undo_btn = "↩️ Remove last turn",
141
  clear_btn = "➕ New conversation",
142
  examples=[
143
  ["帮我生成一个句子,描述春天的美好。", 30, 0.9, 0.95, 1.2, 5],
144
  ["给我讲一个笑话。", 50, 0.8, 0.9, 1.3, 6],
145
- ["Give me some chinese names.", 100, 0.9, 1.0, 1, 5]
146
  ]
147
  )
148
 
 
16
 
17
  max_context_length = 750
18
 
 
 
 
 
 
 
19
  def format(history):
20
  prompt = bos_token
21
 
 
28
  print(prompt)
29
  return prompt
30
 
31
+ def remove_spaces_between_chinese(text):
32
+ rex = r"(?<![a-zA-Z]{2})(?<=[a-zA-Z]{1})[ ]+(?=[a-zA-Z] |.$)|(?<=\p{Han}) +"
33
+ return re.sub(rex, "", text, 0, re.MULTILINE | re.UNICODE)
34
+
35
  def gradio(model, tokenizer):
36
  def response(
37
  user_input,
 
70
  )
71
  output = beam_output[0][prompt_length:]
72
 
73
+ generated = remove_spaces_between_chinese(tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True))
74
+
75
+ return generated
 
 
 
 
76
 
77
  bot = gr.Chatbot(show_copy_button=True, show_share_button=True)
78
 
 
129
  repetition_penalty,
130
  no_repeat_ngram_size,
131
  ],
 
132
  retry_btn = "🔄 Regenerate",
133
  undo_btn = "↩️ Remove last turn",
134
  clear_btn = "➕ New conversation",
135
  examples=[
136
  ["帮我生成一个句子,描述春天的美好。", 30, 0.9, 0.95, 1.2, 5],
137
  ["给我讲一个笑话。", 50, 0.8, 0.9, 1.3, 6],
138
+ ["Give me five english names.", 100, 0.9, 1.0, 1, 5]
139
  ]
140
  )
141