File size: 2,532 Bytes
dd2233b
1112df1
4f885c6
4982f66
 
 
1112df1
 
dd2233b
 
f2efbc0
4982f66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import sqlite3
import os
import json
import random
from ai_sentence import generate_sentence
from tqdm import tqdm

DATA_DIR = "./data"
DB_PATH = os.path.join(DATA_DIR, "sentences.db")


# 抽單字 & 查例句或 GPT 生成例句
def get_words_with_sentences(source, n):
    try:
        # 取得單字庫所有單字資料
        with open(os.path.join(DATA_DIR, f"{source}.json"), 'r', encoding='utf-8') as f:
            words = json.load(f)

        selected_words = random.sample(words, n)

        result_display = ""
        for word_data in tqdm(selected_words, desc="處理單字"):
            word = word_data['word']
            phonetic = word_data['phonetic']

            # 查詢句庫
            conn = sqlite3.connect(DB_PATH)
            c = conn.cursor()
            c.execute('SELECT sentence, source, model FROM sentences WHERE word=?', (word,))
            sentence_records = c.fetchall()
            conn.close()

            if sentence_records:
                # 優先取 Tatoeba
                sentence = ""
                for rec in sentence_records:
                    if rec[1] == "tatoeba":
                        sentence = rec[0]
                        break
                if not sentence:
                    sentence = sentence_records[0][0]
            else:
                # 如果句庫沒有,生成 GPT 句子
                sentence = generate_sentence(word, "EleutherAI/pythia-410m")
                # 存回資料庫
                conn = sqlite3.connect(DB_PATH)
                c = conn.cursor()
                c.execute('''
                    INSERT INTO sentences (word, phonetic, sentence, source, model)
                    VALUES (?, ?, ?, ?, ?)
                    ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
                ''', (word, phonetic, sentence, "ai", "EleutherAI/pythia-410m"))
                conn.commit()
                conn.close()

            result_display += f"""
            <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
                <strong>單字:</strong> {word} <br>
                <strong>音標:</strong> {phonetic or '無'} <br>
                <strong>句子:</strong> {sentence}
            </div>
            """

        return result_display, "✅ 抽單字 & 生成完成"

    except Exception as e:
        return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"