dayuian commited on
Commit
4f885c6
·
verified ·
1 Parent(s): 13a1b41

Update vocab.py

Browse files
Files changed (1) hide show
  1. vocab.py +19 -3
vocab.py CHANGED
@@ -1,11 +1,12 @@
1
  import sqlite3
2
  import os
 
3
 
4
  DATA_DIR = "./data"
5
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
6
 
7
 
8
- # 初始化資料表(保險起見,但你本地應該已建過了)
9
  def init_db():
10
  conn = sqlite3.connect(DB_PATH)
11
  c = conn.cursor()
@@ -24,7 +25,7 @@ def init_db():
24
  conn.close()
25
 
26
 
27
- # 查詢某個單字的所有例句,包含來源 & 模型
28
  def get_sentences_by_word(word):
29
  conn = sqlite3.connect(DB_PATH)
30
  c = conn.cursor()
@@ -34,7 +35,7 @@ def get_sentences_by_word(word):
34
  return results
35
 
36
 
37
- # 儲存句子(GPT 生成 or 句庫)
38
  def save_sentence(word, phonetic, sentence, source, model):
39
  conn = sqlite3.connect(DB_PATH)
40
  c = conn.cursor()
@@ -45,3 +46,18 @@ def save_sentence(word, phonetic, sentence, source, model):
45
  ''', (word, phonetic, sentence, source, model))
46
  conn.commit()
47
  conn.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import sqlite3
2
  import os
3
+ import json
4
 
5
  DATA_DIR = "./data"
6
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
7
 
8
 
9
+ # 初始化資料表
10
  def init_db():
11
  conn = sqlite3.connect(DB_PATH)
12
  c = conn.cursor()
 
25
  conn.close()
26
 
27
 
28
+ # 查某個單字的所有例句
29
  def get_sentences_by_word(word):
30
  conn = sqlite3.connect(DB_PATH)
31
  c = conn.cursor()
 
35
  return results
36
 
37
 
38
+ # 儲存句子到 SQLite
39
  def save_sentence(word, phonetic, sentence, source, model):
40
  conn = sqlite3.connect(DB_PATH)
41
  c = conn.cursor()
 
46
  ''', (word, phonetic, sentence, source, model))
47
  conn.commit()
48
  conn.close()
49
+
50
+
51
+ # 掃描 /data 內的 JSON 檔,回傳單字庫名稱清單
52
+ def get_sources():
53
+ files = os.listdir(DATA_DIR)
54
+ sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
55
+ return sources
56
+
57
+
58
+ # 根據單字庫名稱,回傳該單字庫所有單字
59
+ def get_words_from_source(source):
60
+ data_path = os.path.join(DATA_DIR, f"{source}.json")
61
+ with open(data_path, 'r', encoding='utf-8') as f:
62
+ words = json.load(f)
63
+ return [w['word'] for w in words]