File size: 3,598 Bytes
1e165e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import sqlite3
import os
import random
from ai_sentence import generate_sentence
from vocab import get_words_from_source, get_word_info
from tqdm import tqdm

DATA_DIR = "./data"
DB_PATH = os.path.join(DATA_DIR, "sentences.db")


# 初始化資料庫(建表)
def init_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('''
    CREATE TABLE IF NOT EXISTS sentences (
        word TEXT,
        phonetic TEXT,
        sentence TEXT,
        source TEXT,
        model TEXT,
        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
        PRIMARY KEY (word, source, model)
    )
    ''')
    conn.commit()
    conn.close()


# 查詢句庫中的某個單字的所有例句
def get_sentences_by_word(word):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('SELECT word, phonetic, sentence, source, model FROM sentences WHERE word=?', (word,))
    results = c.fetchall()
    conn.close()
    return results


# 儲存句子到 SQLite
def save_sentence(word, phonetic, sentence, source, model):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('''
    INSERT INTO sentences (word, phonetic, sentence, source, model)
    VALUES (?, ?, ?, ?, ?)
    ON CONFLICT(word, source, model) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
    ''', (word, phonetic, sentence, source, model))
    conn.commit()
    conn.close()


# 隨機抽單字 + 查句庫 or GPT 生成例句
def get_words_with_sentences(source, n):
    try:
        words = get_words_from_source(source)
        selected_words = random.sample(words, n)

        result_display = ""
        for word_data in tqdm(selected_words, desc="處理單字"):
            word = word_data['word']
            phonetic = word_data['phonetic']

            # 查詢句庫
            sentence_records = get_sentences_by_word(word)

            if sentence_records:
                # 優先取 Tatoeba
                sentence = ""
                for rec in sentence_records:
                    if rec[3] == "tatoeba":  # source 字段
                        sentence = rec[2]  # sentence 字段
                        break
                if not sentence:
                    sentence = sentence_records[0][2]
                source_used = sentence_records[0][3]
                model_used = sentence_records[0][4]

            else:
                # GPT 生成句子
                sentence = generate_sentence(word, "EleutherAI/pythia-410m")
                source_used = "ai"
                model_used = "EleutherAI/pythia-410m"

                # 查詢音標,避免 GPT 生成時音標缺失
                if not phonetic:
                    word_info = get_word_info(source, word)
                    phonetic = word_info['phonetic'] if word_info else ''

                # 存回句庫
                save_sentence(word, phonetic, sentence, source_used, model_used)

            result_display += f"""
            <div style="margin-bottom: 10px; padding: 8px; border-left: 4px solid #4CAF50; background-color: #f9f9f9;">
                <strong>單字:</strong> {word} <br>
                <strong>音標:</strong> {phonetic or '無'} <br>
                <strong>句子:</strong> {sentence} <br>
                <strong>來源:</strong> {source_used} {f"({model_used})" if model_used else ""}
            </div>
            """

        return result_display, f"✅ 成功抽取 {n} 個單字 & 句子"

    except Exception as e:
        return f"<p style='color:red;'>❌ 發生錯誤:{str(e)}</p>", f"❌ 錯誤:{str(e)}"