Spaces:

retopara
/

ragflow

Build error

jinhai-2012 commited on Dec 4, 2024

Commit

d919631

1 Parent(s): 4d2f593

Change default error message to English (#3838)

### What problem does this PR solve?

As title

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <[email protected]>

Files changed (2) hide show

rag/llm/chat_model.py +55 -27
rag/nlp/__init__.py +8 -0

rag/llm/chat_model.py CHANGED Viewed

@@ -22,7 +22,7 @@ from abc import ABC
 from openai import OpenAI
 import openai
 from ollama import Client
-from rag.nlp import is_english
 from rag.utils import num_tokens_from_string
 from groq import Groq
 import os
@@ -30,6 +30,8 @@ import json
 import requests
 import asyncio
 class Base(ABC):
     def __init__(self, key, model_name, base_url):
@@ -47,8 +49,10 @@ class Base(ABC):
                 **gen_conf)
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
@@ -80,8 +84,10 @@ class Base(ABC):
                 else: total_tokens = resp.usage.total_tokens
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                 yield ans
         except openai.APIError as e:
@@ -167,8 +173,10 @@ class BaiChuanChat(Base):
                 **self._format_params(gen_conf))
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
@@ -207,8 +215,10 @@ class BaiChuanChat(Base):
                     else resp.usage["total_tokens"]
                 )
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                 yield ans
         except Exception as e:
@@ -242,8 +252,10 @@ class QWenChat(Base):
                 ans += response.output.choices[0]['message']['content']
                 tk_count += response.usage.total_tokens
                 if response.output.choices[0].get("finish_reason", "") == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                 return ans, tk_count
             return "**ERROR**: " + response.message, tk_count
@@ -276,8 +288,10 @@ class QWenChat(Base):
                     ans = resp.output.choices[0]['message']['content']
                     tk_count = resp.usage.total_tokens
                     if resp.output.choices[0].get("finish_reason", "") == "length":
-                        ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                            [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                     yield ans
                 else:
                     yield ans + "\n**ERROR**: " + resp.message if not re.search(r" (key|quota)", str(resp.message).lower()) else "Out of credit. Please set the API key in **settings > Model providers.**"
@@ -308,8 +322,10 @@ class ZhipuChat(Base):
             )
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response.usage.total_tokens
         except Exception as e:
             return "**ERROR**: " + str(e), 0
@@ -333,8 +349,10 @@ class ZhipuChat(Base):
                 delta = resp.choices[0].delta.content
                 ans += delta
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                     tk_count = resp.usage.total_tokens
                 if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
                 yield ans
@@ -525,8 +543,10 @@ class MiniMaxChat(Base):
             response = response.json()
             ans = response["choices"][0]["message"]["content"].strip()
             if response["choices"][0]["finish_reason"] == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response["usage"]["total_tokens"]
         except Exception as e:
             return "**ERROR**: " + str(e), 0
@@ -594,8 +614,10 @@ class MistralChat(Base):
                 **gen_conf)
             ans = response.choices[0].message.content
             if response.choices[0].finish_reason == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
@@ -618,8 +640,10 @@ class MistralChat(Base):
                 ans += resp.choices[0].delta.content
                 total_tokens += 1
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                 yield ans
         except openai.APIError as e:
@@ -811,8 +835,10 @@ class GroqChat:
             )
             ans = response.choices[0].message.content
             if response.choices[0].finish_reason == "length":
-                ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                    [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
             return ans, response.usage.total_tokens
         except Exception as e:
             return ans + "\n**ERROR**: " + str(e), 0
@@ -838,8 +864,10 @@ class GroqChat:
                 ans += resp.choices[0].delta.content
                 total_tokens += 1
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
-                        [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
                 yield ans
         except Exception as e:

 from openai import OpenAI
 import openai
 from ollama import Client
+from rag.nlp import is_chinese
 from rag.utils import num_tokens_from_string
 from groq import Groq
 import os
 import requests
 import asyncio
+LENGTH_NOTIFICATION_CN = "······\n由于长度的原因，回答被截断了，要继续吗？"
+LENGTH_NOTIFICATION_EN = "...\nFor the content length reason, it stopped, continue?"
 class Base(ABC):
     def __init__(self, key, model_name, base_url):
                 **gen_conf)
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
                 else: total_tokens = resp.usage.total_tokens
                 if resp.choices[0].finish_reason == "length":
+                    if is_chinese(ans):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                 yield ans
         except openai.APIError as e:
                 **self._format_params(gen_conf))
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
+                if is_chinese([ans]):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
                     else resp.usage["total_tokens"]
                 )
                 if resp.choices[0].finish_reason == "length":
+                    if is_chinese([ans]):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                 yield ans
         except Exception as e:
                 ans += response.output.choices[0]['message']['content']
                 tk_count += response.usage.total_tokens
                 if response.output.choices[0].get("finish_reason", "") == "length":
+                    if is_chinese([ans]):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                 return ans, tk_count
             return "**ERROR**: " + response.message, tk_count
                     ans = resp.output.choices[0]['message']['content']
                     tk_count = resp.usage.total_tokens
                     if resp.output.choices[0].get("finish_reason", "") == "length":
+                        if is_chinese(ans):
+                            ans += LENGTH_NOTIFICATION_CN
+                        else:
+                            ans += LENGTH_NOTIFICATION_EN
                     yield ans
                 else:
                     yield ans + "\n**ERROR**: " + resp.message if not re.search(r" (key|quota)", str(resp.message).lower()) else "Out of credit. Please set the API key in **settings > Model providers.**"
             )
             ans = response.choices[0].message.content.strip()
             if response.choices[0].finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response.usage.total_tokens
         except Exception as e:
             return "**ERROR**: " + str(e), 0
                 delta = resp.choices[0].delta.content
                 ans += delta
                 if resp.choices[0].finish_reason == "length":
+                    if is_chinese(ans):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                     tk_count = resp.usage.total_tokens
                 if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
                 yield ans
             response = response.json()
             ans = response["choices"][0]["message"]["content"].strip()
             if response["choices"][0]["finish_reason"] == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response["usage"]["total_tokens"]
         except Exception as e:
             return "**ERROR**: " + str(e), 0
                 **gen_conf)
             ans = response.choices[0].message.content
             if response.choices[0].finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response.usage.total_tokens
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
                 ans += resp.choices[0].delta.content
                 total_tokens += 1
                 if resp.choices[0].finish_reason == "length":
+                    if is_chinese(ans):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                 yield ans
         except openai.APIError as e:
             )
             ans = response.choices[0].message.content
             if response.choices[0].finish_reason == "length":
+                if is_chinese(ans):
+                    ans += LENGTH_NOTIFICATION_CN
+                else:
+                    ans += LENGTH_NOTIFICATION_EN
             return ans, response.usage.total_tokens
         except Exception as e:
             return ans + "\n**ERROR**: " + str(e), 0
                 ans += resp.choices[0].delta.content
                 total_tokens += 1
                 if resp.choices[0].finish_reason == "length":
+                    if is_chinese(ans):
+                        ans += LENGTH_NOTIFICATION_CN
+                    else:
+                        ans += LENGTH_NOTIFICATION_EN
                 yield ans
         except Exception as e:

rag/nlp/__init__.py CHANGED Viewed

@@ -230,6 +230,14 @@ def is_english(texts):
         return True
     return False
 def tokenize(d, t, eng):
     d["content_with_weight"] = t

         return True
     return False
+def is_chinese(text):
+    chinese = 0
+    for ch in text:
+        if '\u4e00' <= ch <= '\u9fff':
+            chinese += 1
+    if chinese / len(text) > 0.2:
+        return True
+    return False
 def tokenize(d, t, eng):
     d["content_with_weight"] = t