Jiangxz commited on
Commit
f59d0de
·
verified ·
1 Parent(s): 03a05f9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -17,13 +17,6 @@ import gradio as gr
17
  import re
18
  import time
19
 
20
- def initialize_llm(api_key):
21
- return ChatGroq(
22
- groq_api_key=api_key,
23
- model_name='llama-3.1-70b-versatile'
24
- )
25
- print(f"成功初始化大型語言模型(LLM)")
26
-
27
  def load_documents(sources):
28
  documents = []
29
  for source in sources:
@@ -66,7 +59,7 @@ print(f"\n成功載入 {len(documents)} 個網址或檔案")
66
 
67
  text_splitter = RecursiveCharacterTextSplitter(
68
  chunk_size=512,
69
- chunk_overlap=52,
70
  length_function=len,
71
  is_separator_regex=False,
72
  separators=["\n\n\n","\n\n", "\n", "。"]
@@ -76,7 +69,7 @@ split_docs = text_splitter.split_documents(documents)
76
  print(f"分割後的文件數量:{len(split_docs)}")
77
 
78
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
79
- print(f"\n成功初始化微軟嵌入模型")
80
 
81
  print(f"\n開始建立向量資料庫")
82
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
@@ -90,7 +83,7 @@ retriever = vectorstore.as_retriever(
90
  "lambda_mult": 0.8
91
  }
92
  )
93
- print(f"成功建立檢索器,搜尋演算法:Maximum Marginal Relevance Retrieval")
94
 
95
  template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
96
  在回答之前,請仔細分析檢索到的上下文,確保你的回答準確完整反映了上下文中的訊息,而不是依賴先前的知識,在回應的答案中絕對不要提到是根據上下文回答。
@@ -106,7 +99,6 @@ template = """Let's work this out in a step by step way to be sure we have the r
106
  PROMPT = PromptTemplate(
107
  template=template, input_variables=["context", "question"]
108
  )
109
- print(f"成功定義 Prompt Template")
110
 
111
  def create_chain(llm):
112
  return RetrievalQA.from_chain_type(
@@ -118,16 +110,22 @@ def create_chain(llm):
118
  )
119
  print(f"成功建立 RAG Chain")
120
 
 
 
 
 
 
 
121
  def generate_insight_questions(query, api_key):
122
  llm = initialize_llm(api_key)
123
- prompt = f"""Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
124
  根據以下回答,生成3個相關的洞察問題:
125
 
126
  原始問題: {query}
127
 
128
  請提供3個簡短但有深度的問題,這些問題應該符合:
129
  1. 與原始問題緊密相關
130
- 2. 重新準確描述原始問題
131
  3. 引導更深入的解決原始問題
132
 
133
  請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
@@ -147,7 +145,7 @@ def generate_insight_questions(query, api_key):
147
 
148
  def answer_question(query, api_key):
149
  try:
150
- gr.Info("檢索地方稅知識庫中,請稍待片刻......")
151
  llm = initialize_llm(api_key)
152
  chain = create_chain(llm)
153
  result = chain.invoke({"query": query})
@@ -197,7 +195,7 @@ def handle_interaction(query, api_key, state):
197
  while len(insight_questions) < 3:
198
  insight_questions.append("提供更多地方稅資訊")
199
  end_time = time.time()
200
- gr.Info(f"Model 已完成回覆,總執行時間: {(end_time - start_time):.2f} 秒。")
201
  return answer, insight_questions[0], insight_questions[1], insight_questions[2], state, query
202
 
203
  custom_css = """
@@ -280,7 +278,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
280
  > ### **※ RAG-based 系統部署:江信宗,LLM:Llama-3.1-70B,以地方稅極少知識資料示範,僅供參考,準確資訊請依據地方稅稽徵機關回覆為準。**
281
  """, elem_classes="center-text")
282
  with gr.Row():
283
- query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", scale=3, elem_classes="query-input")
284
  api_key_input = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-key-input")
285
  answer_output = gr.Textbox(label="答案:", max_lines=40, elem_classes="answer-box")
286
  with gr.Row():
 
17
  import re
18
  import time
19
 
 
 
 
 
 
 
 
20
  def load_documents(sources):
21
  documents = []
22
  for source in sources:
 
59
 
60
  text_splitter = RecursiveCharacterTextSplitter(
61
  chunk_size=512,
62
+ chunk_overlap=50,
63
  length_function=len,
64
  is_separator_regex=False,
65
  separators=["\n\n\n","\n\n", "\n", "。"]
 
69
  print(f"分割後的文件數量:{len(split_docs)}")
70
 
71
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
72
+ print(f"\n成功初始化 Microsoft 嵌入模型")
73
 
74
  print(f"\n開始建立向量資料庫")
75
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
 
83
  "lambda_mult": 0.8
84
  }
85
  )
86
+ print(f"檢索演算法:Maximum Marginal Relevance Retrieval")
87
 
88
  template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
89
  在回答之前,請仔細分析檢索到的上下文,確保你的回答準確完整反映了上下文中的訊息,而不是依賴先前的知識,在回應的答案中絕對不要提到是根據上下文回答。
 
99
  PROMPT = PromptTemplate(
100
  template=template, input_variables=["context", "question"]
101
  )
 
102
 
103
  def create_chain(llm):
104
  return RetrievalQA.from_chain_type(
 
110
  )
111
  print(f"成功建立 RAG Chain")
112
 
113
+ def initialize_llm(api_key):
114
+ return ChatGroq(
115
+ groq_api_key=api_key,
116
+ model_name='llama-3.1-70b-versatile'
117
+ )
118
+
119
  def generate_insight_questions(query, api_key):
120
  llm = initialize_llm(api_key)
121
+ prompt = f"""Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in "Traditional Chinese".
122
  根據以下回答,生成3個相關的洞察問題:
123
 
124
  原始問題: {query}
125
 
126
  請提供3個簡短但有深度的問題,這些問題應該符合:
127
  1. 與原始問題緊密相關
128
+ 2. 準確重新描述原始問題
129
  3. 引導更深入的解決原始問題
130
 
131
  請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
 
145
 
146
  def answer_question(query, api_key):
147
  try:
148
+ gr.Info("檢索地方稅知識庫中......")
149
  llm = initialize_llm(api_key)
150
  chain = create_chain(llm)
151
  result = chain.invoke({"query": query})
 
195
  while len(insight_questions) < 3:
196
  insight_questions.append("提供更多地方稅資訊")
197
  end_time = time.time()
198
+ gr.Info(f"Model 已答覆,執行時間: {(end_time - start_time):.2f} 秒。")
199
  return answer, insight_questions[0], insight_questions[1], insight_questions[2], state, query
200
 
201
  custom_css = """
 
278
  > ### **※ RAG-based 系統部署:江信宗,LLM:Llama-3.1-70B,以地方稅極少知識資料示範,僅供參考,準確資訊請依據地方稅稽徵機關回覆為準。**
279
  """, elem_classes="center-text")
280
  with gr.Row():
281
+ query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", scale=3, max_lines=5, elem_classes="query-input")
282
  api_key_input = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-key-input")
283
  answer_output = gr.Textbox(label="答案:", max_lines=40, elem_classes="answer-box")
284
  with gr.Row():