File size: 4,038 Bytes
dd2233b
1112df1
 
 
 
 
 
dd2233b
1112df1
 
 
 
dd2233b
1112df1
dd2233b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1112df1
 
 
 
 
dd2233b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1112df1
dd2233b
 
1112df1
 
 
 
 
 
dd2233b
1112df1
 
 
dd2233b
1112df1
dd2233b
1112df1
 
 
 
dd2233b
1112df1
 
 
 
 
dd2233b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1112df1
 
 
dd2233b
 
1112df1
 
 
 
 
 
 
 
 
dd2233b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import sqlite3
import json
import random
import os
import re
from transformers import AutoModelForCausalLM, AutoTokenizer

# 初始化 GPT 模型
model_name = "EleutherAI/pythia-410m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# 資料夾
DATA_DIR = "./data"
DB_PATH = os.path.join(DATA_DIR, "sentences.db")

# 建立資料表
def init_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('''
    CREATE TABLE IF NOT EXISTS sentences (
        word TEXT PRIMARY KEY,
        phonetic TEXT,
        sentence TEXT,
        created_at DATETIME DEFAULT CURRENT_TIMESTAMP
    )
    ''')
    conn.commit()
    conn.close()

# 自動掃描資料夾生成選單
def get_sources():
    files = os.listdir(DATA_DIR)
    sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
    return sources

# 查詢句庫
def get_sentence(word):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('SELECT word, phonetic, sentence FROM sentences WHERE word=?', (word,))
    result = c.fetchone()
    conn.close()
    return result

# 保存句子到 SQLite
def save_sentence(word, phonetic, sentence):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute('''
    INSERT INTO sentences (word, phonetic, sentence)
    VALUES (?, ?, ?)
    ON CONFLICT(word) DO UPDATE SET sentence=excluded.sentence, phonetic=excluded.phonetic
    ''', (word, phonetic, sentence))
    conn.commit()
    conn.close()

# 清理 GPT 生成句子的雜訊
def clean_sentence(output):
    output = output.split(":")[-1].strip()
    output = re.sub(r"^\d+\.\s*", "", output).strip()
    output = re.sub(r"Write.*?beginners\.", "", output, flags=re.IGNORECASE).strip()
    output = re.sub(r"\*\*?\d+\.*\*\*", "", output).strip()
    if not output.endswith("."):
        output += "."
    return output

# 核心:抽單字 + 查句庫 or GPT 生成句子
def get_words_with_sentences(source, n):
    status = []
    display_result = ""

    try:
        # 讀取單字庫
        data_path = os.path.join(DATA_DIR, f"{source}.json")
        with open(data_path, 'r', encoding='utf-8') as f:
            words = json.load(f)

        # 隨機抽取 n 個單字
        selected_words = random.sample(words, n)
        results = []

        for i, word_data in enumerate(selected_words):
            word = word_data['word']
            phonetic = word_data['phonetic']

            # 查詢句庫,看是否已有例句
            cached_result = get_sentence(word)
            if cached_result:
                sentence = cached_result[2]
                status.append(f"✅ {word} 已有例句,從句庫讀取")
            else:
                # 沒有的話,GPT 生成句子
                status.append(f"📝 正在生成第 {i + 1}/{n} 個單字 [{word}] 例句...")
                prompt = f"A simple English sentence with the word '{word}':"
                inputs = tokenizer(prompt, return_tensors="pt")
                outputs = model.generate(**inputs, max_new_tokens=30)
                sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)

                # 清理生成句子
                sentence = clean_sentence(sentence)

                # 存入句庫
                save_sentence(word, phonetic, sentence)

            # 美化輸出
            display_result += f"""
            <div style="border-bottom: 1px solid #ddd; margin-bottom: 10px; padding-bottom: 5px;">
                <p><strong>📖 單字:</strong> {word}</p>
                <p><strong>🔤 音標:</strong> {phonetic}</p>
                <p><strong>✍️ 例句:</strong> {sentence}</p>
            </div>
            """

        status.append("✅ 完成!")
        return display_result, "\n".join(status)

    except Exception as e:
        status.append(f"❌ 發生錯誤: {str(e)}")
        return f"<p style='color:red;'>發生錯誤:{str(e)}</p>", "\n".join(status)

# 啟動時自動建表
init_db()