Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
load_dotenv()
|
|
|
|
|
7 |
from langchain_community.utils import user_agent
|
8 |
from langchain_groq import ChatGroq
|
9 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
@@ -13,12 +15,10 @@ from langchain.chains import RetrievalQA
|
|
13 |
from langchain_community.document_loaders import WebBaseLoader, TextLoader
|
14 |
from langchain.prompts import PromptTemplate
|
15 |
from langchain.schema import Document
|
16 |
-
import gradio as gr
|
17 |
-
from openai import OpenAI
|
18 |
import resend
|
|
|
19 |
import re
|
20 |
import time
|
21 |
-
import requests
|
22 |
|
23 |
def load_documents(sources):
|
24 |
documents = []
|
@@ -58,7 +58,7 @@ sources = [
|
|
58 |
]
|
59 |
|
60 |
documents = load_documents(sources)
|
61 |
-
print(f"\n成功載入 {len(documents)}
|
62 |
|
63 |
text_splitter = RecursiveCharacterTextSplitter(
|
64 |
chunk_size=512,
|
@@ -72,7 +72,7 @@ split_docs = text_splitter.split_documents(documents)
|
|
72 |
print(f"分割後的文件數量:{len(split_docs)}")
|
73 |
|
74 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
|
75 |
-
print(f"\n成功初始化 Microsoft
|
76 |
|
77 |
print(f"\n開始建立向量資料庫")
|
78 |
vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
|
@@ -207,7 +207,11 @@ def fetch_law_summary(tax_law, keywords):
|
|
207 |
url = "https://ttc.mof.gov.tw/Api/GetData"
|
208 |
headers = {
|
209 |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
210 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
|
|
|
|
|
|
|
|
|
211 |
}
|
212 |
gr.Info("檢索法令彙編函釋中......")
|
213 |
version_payload = {
|
@@ -263,7 +267,7 @@ def fetch_law_summary(tax_law, keywords):
|
|
263 |
except Exception as e:
|
264 |
print(f"檢索關鍵字 '{keyword}' 的法令彙編函釋時發生錯誤:{str(e)}")
|
265 |
if all_results:
|
266 |
-
summary = f"<h3>相關法令彙編函釋檢索結果({latest_version}):</h3>"
|
267 |
unique_results = {}
|
268 |
for result in all_results:
|
269 |
tax_sn = result.get('TaxSN', '')
|
@@ -288,7 +292,7 @@ def llm_openai_api(query, answer):
|
|
288 |
user_prompt = f"""
|
289 |
「題目:{query}
|
290 |
答案:{answer}」
|
291 |
-
請詳細分析答案內容後,依據與題目相關性最高的稅目名稱及最多3
|
292 |
|
293 |
# 回應字典格式範例:
|
294 |
{{"TaxName": "地價稅", "KeyWord": "宿舍用地,醫護人員"}}
|
@@ -323,8 +327,8 @@ def handle_interaction(query, api_key, state):
|
|
323 |
"html": f"<strong>查詢內容:<br>{query}</strong>",
|
324 |
}
|
325 |
try:
|
326 |
-
|
327 |
-
print(f"Email sent successfully.
|
328 |
except Exception as e:
|
329 |
print(f"Failed to send email:{str(e)}")
|
330 |
api_key = os.getenv("YOUR_API_KEY")
|
@@ -448,7 +452,7 @@ custom_css = """
|
|
448 |
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
|
449 |
gr.Markdown("""
|
450 |
# 地方稅知識庫系統 - 財政部財政資訊中心
|
451 |
-
> ### **※ RAG-based
|
452 |
""", elem_classes="center-text")
|
453 |
with gr.Row():
|
454 |
query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", autofocus=True, scale=3, max_lines=5, elem_classes="query-input")
|
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
load_dotenv()
|
7 |
+
import gradio as gr
|
8 |
+
from openai import OpenAI
|
9 |
from langchain_community.utils import user_agent
|
10 |
from langchain_groq import ChatGroq
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
15 |
from langchain_community.document_loaders import WebBaseLoader, TextLoader
|
16 |
from langchain.prompts import PromptTemplate
|
17 |
from langchain.schema import Document
|
|
|
|
|
18 |
import resend
|
19 |
+
import requests
|
20 |
import re
|
21 |
import time
|
|
|
22 |
|
23 |
def load_documents(sources):
|
24 |
documents = []
|
|
|
58 |
]
|
59 |
|
60 |
documents = load_documents(sources)
|
61 |
+
print(f"\n成功載入 {len(documents)} 個檔案")
|
62 |
|
63 |
text_splitter = RecursiveCharacterTextSplitter(
|
64 |
chunk_size=512,
|
|
|
72 |
print(f"分割後的文件數量:{len(split_docs)}")
|
73 |
|
74 |
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
|
75 |
+
print(f"\n成功初始化 Microsoft 嵌入模型")
|
76 |
|
77 |
print(f"\n開始建立向量資料庫")
|
78 |
vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
|
|
|
207 |
url = "https://ttc.mof.gov.tw/Api/GetData"
|
208 |
headers = {
|
209 |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
210 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
|
211 |
+
"accept": "application/json, text/javascript, */*; q=0.01",
|
212 |
+
"accept-encoding": "gzip, deflate, br, zstd",
|
213 |
+
"accept-language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
|
214 |
+
"referer": "https://ttc.mof.gov.tw/"
|
215 |
}
|
216 |
gr.Info("檢索法令彙編函釋中......")
|
217 |
version_payload = {
|
|
|
267 |
except Exception as e:
|
268 |
print(f"檢索關鍵字 '{keyword}' 的法令彙編函釋時發生錯誤:{str(e)}")
|
269 |
if all_results:
|
270 |
+
summary = f"<h3>相關法令彙編函釋檢索結果({tax_law} {latest_version}):</h3>"
|
271 |
unique_results = {}
|
272 |
for result in all_results:
|
273 |
tax_sn = result.get('TaxSN', '')
|
|
|
292 |
user_prompt = f"""
|
293 |
「題目:{query}
|
294 |
答案:{answer}」
|
295 |
+
請詳細分析答案內容後,依據與題目相關性最高的稅目名稱及最多3個重點關鍵字回應我,提供的3個重點關鍵字不能與稅目名稱相同,問題與答案中的稅目名稱列入TaxName,關鍵字列入KeyWord,只須根據格式回應,不要寫其他的。
|
296 |
|
297 |
# 回應字典格式範例:
|
298 |
{{"TaxName": "地價稅", "KeyWord": "宿舍用地,醫護人員"}}
|
|
|
327 |
"html": f"<strong>查詢內容:<br>{query}</strong>",
|
328 |
}
|
329 |
try:
|
330 |
+
email_response = resend.Emails.send(params)
|
331 |
+
print(f"Email sent successfully. Response:{email_response}")
|
332 |
except Exception as e:
|
333 |
print(f"Failed to send email:{str(e)}")
|
334 |
api_key = os.getenv("YOUR_API_KEY")
|
|
|
452 |
with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
|
453 |
gr.Markdown("""
|
454 |
# 地方稅知識庫系統 - 財政部財政資訊中心
|
455 |
+
> ### **※ RAG-based KM 以地方稅極少知識資料作示範,僅供參考,準確資訊請依地方稅稽徵機關回覆為準。系統部署:江信宗,LLM:Llama-3.1-70B。**
|
456 |
""", elem_classes="center-text")
|
457 |
with gr.Row():
|
458 |
query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", autofocus=True, scale=3, max_lines=5, elem_classes="query-input")
|