Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
load_dotenv()
|
7 |
-
os.environ["LANGCHAIN_COMMUNITY__USER_AGENT"] = "Taiwan_Tax_Knowledge-base"
|
8 |
from langchain_community.utils import user_agent
|
9 |
from langchain_groq import ChatGroq
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -18,7 +17,6 @@ import gradio as gr
|
|
18 |
import re
|
19 |
|
20 |
def initialize_llm(api_key):
|
21 |
-
os.environ["GROQ_API_KEY"] = api_key
|
22 |
return ChatGroq(
|
23 |
groq_api_key=api_key,
|
24 |
model_name='llama-3.1-70b-versatile'
|
@@ -67,8 +65,9 @@ print(f"\n成功載入 {len(documents)} 個網址或檔案")
|
|
67 |
|
68 |
text_splitter = RecursiveCharacterTextSplitter(
|
69 |
chunk_size=512,
|
70 |
-
chunk_overlap=
|
71 |
length_function=len,
|
|
|
72 |
separators=["\n\n\n","\n\n", "\n", "。"]
|
73 |
)
|
74 |
|
@@ -76,7 +75,7 @@ split_docs = text_splitter.split_documents(documents)
|
|
76 |
print(f"分割後的文件數量:{len(split_docs)}")
|
77 |
|
78 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
|
79 |
-
print(f"\n
|
80 |
|
81 |
print(f"\n開始建立向量資料庫")
|
82 |
vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
|
@@ -93,8 +92,8 @@ retriever = vectorstore.as_retriever(
|
|
93 |
print(f"成功建立檢索器,搜尋演算法:Maximum Marginal Relevance Retrieval")
|
94 |
|
95 |
template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
|
96 |
-
|
97 |
-
|
98 |
如果檢索到的上下文不包含足夠回答問題的訊息,請誠實的說明,不要試圖編造答案。
|
99 |
|
100 |
上下文: {context}
|
@@ -127,8 +126,8 @@ def generate_insight_questions(query, api_key):
|
|
127 |
|
128 |
請提供3個簡短但有深度的問題,這些問題應該符合:
|
129 |
1. 與原始問題緊密相關
|
130 |
-
2.
|
131 |
-
3.
|
132 |
|
133 |
請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
|
134 |
"""
|
@@ -149,7 +148,7 @@ def answer_question(query, api_key):
|
|
149 |
try:
|
150 |
llm = initialize_llm(api_key)
|
151 |
chain = create_chain(llm)
|
152 |
-
result = chain({"query": query})
|
153 |
answer = result["result"]
|
154 |
insight_questions = generate_insight_questions(query, api_key)
|
155 |
while len(insight_questions) < 3:
|
@@ -186,6 +185,8 @@ def convert_punctuation(text):
|
|
186 |
def handle_interaction(query, api_key, state):
|
187 |
if state is None:
|
188 |
state = {"history": []}
|
|
|
|
|
189 |
query = convert_punctuation(query)
|
190 |
answer, insight_questions = answer_multiple_questions(query, api_key)
|
191 |
state["history"].append((query, answer))
|
@@ -286,4 +287,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
|
|
286 |
)
|
287 |
|
288 |
if __name__ == "__main__":
|
289 |
-
|
|
|
|
|
|
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
load_dotenv()
|
|
|
7 |
from langchain_community.utils import user_agent
|
8 |
from langchain_groq import ChatGroq
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
17 |
import re
|
18 |
|
19 |
def initialize_llm(api_key):
|
|
|
20 |
return ChatGroq(
|
21 |
groq_api_key=api_key,
|
22 |
model_name='llama-3.1-70b-versatile'
|
|
|
65 |
|
66 |
text_splitter = RecursiveCharacterTextSplitter(
|
67 |
chunk_size=512,
|
68 |
+
chunk_overlap=64,
|
69 |
length_function=len,
|
70 |
+
is_separator_regex=False,
|
71 |
separators=["\n\n\n","\n\n", "\n", "。"]
|
72 |
)
|
73 |
|
|
|
75 |
print(f"分割後的文件數量:{len(split_docs)}")
|
76 |
|
77 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
|
78 |
+
print(f"\n成功初始化微軟嵌入模型")
|
79 |
|
80 |
print(f"\n開始建立向量資料庫")
|
81 |
vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
|
|
|
92 |
print(f"成功建立檢索器,搜尋演算法:Maximum Marginal Relevance Retrieval")
|
93 |
|
94 |
template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
|
95 |
+
在回答之前,請仔細分析檢索到的上下文,確保你的回答準確完整反映了上下文中的訊息,而不是依賴先前的知識,在回應的答案中絕對不要提到是根據上下文回答。
|
96 |
+
如果檢索到的多個上下文之間存在聯繫,請整合這些訊息以提供更全面的回答,但要避免過度推斷。
|
97 |
如果檢索到的上下文不包含足夠回答問題的訊息,請誠實的說明,不要試圖編造答案。
|
98 |
|
99 |
上下文: {context}
|
|
|
126 |
|
127 |
請提供3個簡短但有深度的問題,這些問題應該符合:
|
128 |
1. 與原始問題緊密相關
|
129 |
+
2. 重新準確描述原始問題
|
130 |
+
3. 引導更深入的解決原始問題
|
131 |
|
132 |
請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
|
133 |
"""
|
|
|
148 |
try:
|
149 |
llm = initialize_llm(api_key)
|
150 |
chain = create_chain(llm)
|
151 |
+
result = chain.invoke({"query": query})
|
152 |
answer = result["result"]
|
153 |
insight_questions = generate_insight_questions(query, api_key)
|
154 |
while len(insight_questions) < 3:
|
|
|
185 |
def handle_interaction(query, api_key, state):
|
186 |
if state is None:
|
187 |
state = {"history": []}
|
188 |
+
if not api_key:
|
189 |
+
api_key = os.getenv("Llama70B_Key")
|
190 |
query = convert_punctuation(query)
|
191 |
answer, insight_questions = answer_multiple_questions(query, api_key)
|
192 |
state["history"].append((query, answer))
|
|
|
287 |
)
|
288 |
|
289 |
if __name__ == "__main__":
|
290 |
+
if "SPACE_ID" in os.environ:
|
291 |
+
iface.launch()
|
292 |
+
else:
|
293 |
+
iface.launch(share=True)
|