Jiangxz commited on
Commit
7129c1e
·
verified ·
1 Parent(s): 1abda26

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -4,6 +4,8 @@
4
  import os
5
  from dotenv import load_dotenv
6
  load_dotenv()
 
 
7
  from langchain_community.utils import user_agent
8
  from langchain_groq import ChatGroq
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -13,12 +15,10 @@ from langchain.chains import RetrievalQA
13
  from langchain_community.document_loaders import WebBaseLoader, TextLoader
14
  from langchain.prompts import PromptTemplate
15
  from langchain.schema import Document
16
- import gradio as gr
17
- from openai import OpenAI
18
  import resend
 
19
  import re
20
  import time
21
- import requests
22
 
23
  def load_documents(sources):
24
  documents = []
@@ -58,7 +58,7 @@ sources = [
58
  ]
59
 
60
  documents = load_documents(sources)
61
- print(f"\n成功載入 {len(documents)} 個網址或檔案")
62
 
63
  text_splitter = RecursiveCharacterTextSplitter(
64
  chunk_size=512,
@@ -72,7 +72,7 @@ split_docs = text_splitter.split_documents(documents)
72
  print(f"分割後的文件數量:{len(split_docs)}")
73
 
74
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
75
- print(f"\n成功初始化 Microsoft ���入型")
76
 
77
  print(f"\n開始建立向量資料庫")
78
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
@@ -207,7 +207,11 @@ def fetch_law_summary(tax_law, keywords):
207
  url = "https://ttc.mof.gov.tw/Api/GetData"
208
  headers = {
209
  "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
210
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
 
 
 
 
211
  }
212
  gr.Info("檢索法令彙編函釋中......")
213
  version_payload = {
@@ -263,7 +267,7 @@ def fetch_law_summary(tax_law, keywords):
263
  except Exception as e:
264
  print(f"檢索關鍵字 '{keyword}' 的法令彙編函釋時發生錯誤:{str(e)}")
265
  if all_results:
266
- summary = f"<h3>相關法令彙編函釋檢索結果({latest_version}):</h3>"
267
  unique_results = {}
268
  for result in all_results:
269
  tax_sn = result.get('TaxSN', '')
@@ -288,7 +292,7 @@ def llm_openai_api(query, answer):
288
  user_prompt = f"""
289
  「題目:{query}
290
  答案:{answer}」
291
- 請詳細分析答案內容後,依據與題目相關性最高的稅目名稱及最多3個重點關鍵字回應我,問題與答案中的稅目名稱列入TaxName,關鍵字列入KeyWord,只須根據格式回應,不要寫其他的。
292
 
293
  # 回應字典格式範例:
294
  {{"TaxName": "地價稅", "KeyWord": "宿舍用地,醫護人員"}}
@@ -323,8 +327,8 @@ def handle_interaction(query, api_key, state):
323
  "html": f"<strong>查詢內容:<br>{query}</strong>",
324
  }
325
  try:
326
- email = resend.Emails.send(params)
327
- print(f"Email sent successfully. ID:{email.id}")
328
  except Exception as e:
329
  print(f"Failed to send email:{str(e)}")
330
  api_key = os.getenv("YOUR_API_KEY")
@@ -448,7 +452,7 @@ custom_css = """
448
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
449
  gr.Markdown("""
450
  # 地方稅知識庫系統 - 財政部財政資訊中心
451
- > ### **※ RAG-based 系統部署:江信宗,以地方稅極少知識資料示範,僅供參考,準確資訊請依地方稅稽徵機關回覆為準,LLM:Llama-3.1-70B。**
452
  """, elem_classes="center-text")
453
  with gr.Row():
454
  query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", autofocus=True, scale=3, max_lines=5, elem_classes="query-input")
 
4
  import os
5
  from dotenv import load_dotenv
6
  load_dotenv()
7
+ import gradio as gr
8
+ from openai import OpenAI
9
  from langchain_community.utils import user_agent
10
  from langchain_groq import ChatGroq
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
15
  from langchain_community.document_loaders import WebBaseLoader, TextLoader
16
  from langchain.prompts import PromptTemplate
17
  from langchain.schema import Document
 
 
18
  import resend
19
+ import requests
20
  import re
21
  import time
 
22
 
23
  def load_documents(sources):
24
  documents = []
 
58
  ]
59
 
60
  documents = load_documents(sources)
61
+ print(f"\n成功載入 {len(documents)} 個檔案")
62
 
63
  text_splitter = RecursiveCharacterTextSplitter(
64
  chunk_size=512,
 
72
  print(f"分割後的文件數量:{len(split_docs)}")
73
 
74
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
75
+ print(f"\n成功初始化 Microsoft 嵌入模型")
76
 
77
  print(f"\n開始建立向量資料庫")
78
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
 
207
  url = "https://ttc.mof.gov.tw/Api/GetData"
208
  headers = {
209
  "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
210
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
211
+ "accept": "application/json, text/javascript, */*; q=0.01",
212
+ "accept-encoding": "gzip, deflate, br, zstd",
213
+ "accept-language": "zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7",
214
+ "referer": "https://ttc.mof.gov.tw/"
215
  }
216
  gr.Info("檢索法令彙編函釋中......")
217
  version_payload = {
 
267
  except Exception as e:
268
  print(f"檢索關鍵字 '{keyword}' 的法令彙編函釋時發生錯誤:{str(e)}")
269
  if all_results:
270
+ summary = f"<h3>相關法令彙編函釋檢索結果({tax_law} {latest_version}):</h3>"
271
  unique_results = {}
272
  for result in all_results:
273
  tax_sn = result.get('TaxSN', '')
 
292
  user_prompt = f"""
293
  「題目:{query}
294
  答案:{answer}」
295
+ 請詳細分析答案內容後,依據與題目相關性最高的稅目名稱及最多3個重點關鍵字回應我,提供的3個重點關鍵字不能與稅目名稱相同,問題與答案中的稅目名稱列入TaxName,關鍵字列入KeyWord,只須根據格式回應,不要寫其他的。
296
 
297
  # 回應字典格式範例:
298
  {{"TaxName": "地價稅", "KeyWord": "宿舍用地,醫護人員"}}
 
327
  "html": f"<strong>查詢內容:<br>{query}</strong>",
328
  }
329
  try:
330
+ email_response = resend.Emails.send(params)
331
+ print(f"Email sent successfully. Response:{email_response}")
332
  except Exception as e:
333
  print(f"Failed to send email:{str(e)}")
334
  api_key = os.getenv("YOUR_API_KEY")
 
452
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as iface:
453
  gr.Markdown("""
454
  # 地方稅知識庫系統 - 財政部財政資訊中心
455
+ > ### **※ RAG-based KM 以地方稅極少知識資料作示範,僅供參考,準確資訊請依地方稅稽徵機關回覆為準。系統部署:江信宗,LLM:Llama-3.1-70B。**
456
  """, elem_classes="center-text")
457
  with gr.Row():
458
  query_input = gr.Textbox(label="輸入您的問題,系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題(支援同時輸入多個問題,例如:問題1?問題2?)", autofocus=True, scale=3, max_lines=5, elem_classes="query-input")