VocabLine / vocab.py
dayuian's picture
Update vocab.py
4982f66 verified
raw
history blame
2.53 kB
import sqlite3
import os
import json
import random
from ai_sentence import generate_sentence
from tqdm import tqdm
DATA_DIR = "./data"
DB_PATH = os.path.join(DATA_DIR, "sentences.db")
# 抽單字 & 查例句或 GPT 生成例句
def get_words_with_sentences(source, n):
try:
# 取得單字庫所有單字資料
with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
words = json.load(f)
selected_words = random.sample(words, n)
result_display = ""
for word_data in tqdm(selected_words, desc="處理單字"):
word = word_data['word']
phonetic = word_data['phonetic']
# 查詢句庫
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,))
sentence_records = c.fetchall()
conn.close()
if sentence_records:
# 優先取 Tatoeba
sentence = ""
for rec in sentence_records:
if rec[1] == "tatoeba":
sentence = rec[0]
break
if not sentence:
sentence = sentence_records[0][0]
else:
# 如果句庫沒有,生成 GPT 句子
sentence = generate_sentence(word, "EleutherAI/pythia-410m")
# 存回資料庫
conn = sqlite3.connect(DB_PATH)
c = conn.cursor()
c.execute('''
INSERT INTO sentences (word, phonetic, sentence, source, model)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
conn.commit()
conn.close()
result_display += f"""
<div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
<strong>單字:</strong> {word} <br>
<strong>音標:</strong> {phonetic or '無'} <br>
<strong>句子:</strong> {sentence}
</div>
"""
return result_display, "✅ 抽單字 & 生成完成"
except Exception as e:
return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"