dayuian commited on
Commit
4982f66
·
verified ·
1 Parent(s): c2aeaf4

Update vocab.py

Browse files
Files changed (1) hide show
  1. vocab.py +59 -55
vocab.py CHANGED
@@ -1,63 +1,67 @@
1
  import sqlite3
2
  import os
3
  import json
 
 
 
4
 
5
  DATA_DIR = "./data"
6
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
7
 
8
 
9
- # 初始化資料表
10
- def init_db():
11
- conn = sqlite3.connect(DB_PATH)
12
- c = conn.cursor()
13
- c.execute('''
14
- CREATE TABLE IF NOT EXISTS sentences (
15
- word TEXT,
16
- phonetic TEXT,
17
- sentence TEXT,
18
- source TEXT,
19
- model TEXT,
20
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
21
- PRIMARY KEY (word, source, model)
22
- )
23
- ''')
24
- conn.commit()
25
- conn.close()
26
-
27
-
28
- # 查某個單字的所有例句
29
- def get_sentences_by_word(word):
30
- conn = sqlite3.connect(DB_PATH)
31
- c = conn.cursor()
32
- c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,))
33
- results = c.fetchall()
34
- conn.close()
35
- return results
36
-
37
-
38
- # 儲存句子到 SQLite
39
- def save_sentence(word, phonetic, sentence, source, model):
40
- conn = sqlite3.connect(DB_PATH)
41
- c = conn.cursor()
42
- c.execute('''
43
- INSERT INTO sentences (word, phonetic, sentence, source, model)
44
- VALUES (?, ?, ?, ?, ?)
45
- ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
46
- ''', (word, phonetic, sentence, source, model))
47
- conn.commit()
48
- conn.close()
49
-
50
-
51
- # 掃描 /data 內的 JSON 檔,回傳單字庫名稱清單
52
- def get_sources():
53
- files = os.listdir(DATA_DIR)
54
- sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
55
- return sources
56
-
57
-
58
- # 根據單字庫名稱,回傳該單字庫所有單字
59
- def get_words_from_source(source):
60
- data_path = os.path.join(DATA_DIR, f"{source}.json")
61
- with open(data_path, 'r', encoding='utf-8') as f:
62
- words = json.load(f)
63
- return [w['word'] for w in words]
 
 
1
  import sqlite3
2
  import os
3
  import json
4
+ import random
5
+ from ai_sentence import generate_sentence
6
+ from tqdm import tqdm
7
 
8
  DATA_DIR = "./data"
9
  DB_PATH = os.path.join(DATA_DIR, "sentences.db")
10
 
11
 
12
+ # 抽單字 & 查例句或 GPT 生成例句
13
+ def get_words_with_sentences(source, n):
14
+ try:
15
+ # 取得單字庫所有單字資料
16
+ with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
17
+ words = json.load(f)
18
+
19
+ selected_words = random.sample(words, n)
20
+
21
+ result_display = ""
22
+ for word_data in tqdm(selected_words, desc="處理單字"):
23
+ word = word_data['word']
24
+ phonetic = word_data['phonetic']
25
+
26
+ # 查詢句庫
27
+ conn = sqlite3.connect(DB_PATH)
28
+ c = conn.cursor()
29
+ c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,))
30
+ sentence_records = c.fetchall()
31
+ conn.close()
32
+
33
+ if sentence_records:
34
+ # 優先取 Tatoeba
35
+ sentence = ""
36
+ for rec in sentence_records:
37
+ if rec[1] == "tatoeba":
38
+ sentence = rec[0]
39
+ break
40
+ if not sentence:
41
+ sentence = sentence_records[0][0]
42
+ else:
43
+ # 如果句庫沒有,生成 GPT 句子
44
+ sentence = generate_sentence(word, "EleutherAI/pythia-410m")
45
+ # 存回資料庫
46
+ conn = sqlite3.connect(DB_PATH)
47
+ c = conn.cursor()
48
+ c.execute('''
49
+ INSERT INTO sentences (word, phonetic, sentence, source, model)
50
+ VALUES (?, ?, ?, ?, ?)
51
+ ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
52
+ ''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
53
+ conn.commit()
54
+ conn.close()
55
+
56
+ result_display += f"""
57
+ <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
58
+ <strong>單字:</strong> {word} <br>
59
+ <strong>音標:</strong> {phonetic or '無'} <br>
60
+ <strong>句子:</strong> {sentence}
61
+ </div>
62
+ """
63
+
64
+ return result_display, "✅ 抽單字 & 生成完成"
65
+
66
+ except Exception as e:
67
+ return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"