Spaces:
Running
Running
import sqlite3 | |
import os | |
import json | |
import random | |
from ai_sentence import generate_sentence | |
from tqdm import tqdm | |
DATA_DIR = "./data" | |
DB_PATH = os.path.join(DATA_DIR, "sentences.db") | |
# 抽單字 & 查例句或 GPT 生成例句 | |
def get_words_with_sentences(source, n): | |
try: | |
# 取得單字庫所有單字資料 | |
with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f: | |
words = json.load(f) | |
selected_words = random.sample(words, n) | |
result_display = "" | |
for word_data in tqdm(selected_words, desc="處理單字"): | |
word = word_data['word'] | |
phonetic = word_data['phonetic'] | |
# 查詢句庫 | |
conn = sqlite3.connect(DB_PATH) | |
c = conn.cursor() | |
c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,)) | |
sentence_records = c.fetchall() | |
conn.close() | |
if sentence_records: | |
# 優先取 Tatoeba | |
sentence = "" | |
for rec in sentence_records: | |
if rec[1] == "tatoeba": | |
sentence = rec[0] | |
break | |
if not sentence: | |
sentence = sentence_records[0][0] | |
else: | |
# 如果句庫沒有,生成 GPT 句子 | |
sentence = generate_sentence(word, "EleutherAI/pythia-410m") | |
# 存回資料庫 | |
conn = sqlite3.connect(DB_PATH) | |
c = conn.cursor() | |
c.execute(''' | |
INSERT INTO sentences (word, phonetic, sentence, source, model) | |
VALUES (?, ?, ?, ?, ?) | |
ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic | |
''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m")) | |
conn.commit() | |
conn.close() | |
result_display += f""" | |
<div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;"> | |
<strong>單字:</strong> {word} <br> | |
<strong>音標:</strong> {phonetic or '無'} <br> | |
<strong>句子:</strong> {sentence} | |
</div> | |
""" | |
return result_display, "✅ 抽單字 & 生成完成" | |
except Exception as e: | |
return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}" | |