Spaces:
Running
Running
Update vocab.py
Browse files
vocab.py
CHANGED
@@ -1,63 +1,67 @@
|
|
1 |
import sqlite3
|
2 |
import os
|
3 |
import json
|
|
|
|
|
|
|
4 |
|
5 |
DATA_DIR = "./data"
|
6 |
DB_PATH = os.path.join(DATA_DIR, "sentences.db")
|
7 |
|
8 |
|
9 |
-
#
|
10 |
-
def
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
1 |
import sqlite3
|
2 |
import os
|
3 |
import json
|
4 |
+
import random
|
5 |
+
from ai_sentence import generate_sentence
|
6 |
+
from tqdm import tqdm
|
7 |
|
8 |
DATA_DIR = "./data"
|
9 |
DB_PATH = os.path.join(DATA_DIR, "sentences.db")
|
10 |
|
11 |
|
12 |
+
# 抽單字 & 查例句或 GPT 生成例句
|
13 |
+
def get_words_with_sentences(source, n):
|
14 |
+
try:
|
15 |
+
# 取得單字庫所有單字資料
|
16 |
+
with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
|
17 |
+
words = json.load(f)
|
18 |
+
|
19 |
+
selected_words = random.sample(words, n)
|
20 |
+
|
21 |
+
result_display = ""
|
22 |
+
for word_data in tqdm(selected_words, desc="處理單字"):
|
23 |
+
word = word_data['word']
|
24 |
+
phonetic = word_data['phonetic']
|
25 |
+
|
26 |
+
# 查詢句庫
|
27 |
+
conn = sqlite3.connect(DB_PATH)
|
28 |
+
c = conn.cursor()
|
29 |
+
c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,))
|
30 |
+
sentence_records = c.fetchall()
|
31 |
+
conn.close()
|
32 |
+
|
33 |
+
if sentence_records:
|
34 |
+
# 優先取 Tatoeba
|
35 |
+
sentence = ""
|
36 |
+
for rec in sentence_records:
|
37 |
+
if rec[1] == "tatoeba":
|
38 |
+
sentence = rec[0]
|
39 |
+
break
|
40 |
+
if not sentence:
|
41 |
+
sentence = sentence_records[0][0]
|
42 |
+
else:
|
43 |
+
# 如果句庫沒有,生成 GPT 句子
|
44 |
+
sentence = generate_sentence(word, "EleutherAI/pythia-410m")
|
45 |
+
# 存回資料庫
|
46 |
+
conn = sqlite3.connect(DB_PATH)
|
47 |
+
c = conn.cursor()
|
48 |
+
c.execute('''
|
49 |
+
INSERT INTO sentences (word, phonetic, sentence, source, model)
|
50 |
+
VALUES (?, ?, ?, ?, ?)
|
51 |
+
ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
|
52 |
+
''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
|
53 |
+
conn.commit()
|
54 |
+
conn.close()
|
55 |
+
|
56 |
+
result_display += f"""
|
57 |
+
<div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
|
58 |
+
<strong>單字:</strong> {word} <br>
|
59 |
+
<strong>音標:</strong> {phonetic or '無'} <br>
|
60 |
+
<strong>句子:</strong> {sentence}
|
61 |
+
</div>
|
62 |
+
"""
|
63 |
+
|
64 |
+
return result_display, "✅ 抽單字 & 生成完成"
|
65 |
+
|
66 |
+
except Exception as e:
|
67 |
+
return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"
|