File size: 2,782 Bytes
1112df1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import json
import random
import os
import re
from transformers import AutoModelForCausalLM, AutoTokenizer

# 初始化模型,只執行一次,避免每次請求都重新載入
model_name = "EleutherAI/pythia-410m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

DATA_DIR = "./data"

def get_sources():
    """掃描資料夾,回傳所有單字庫名稱"""
    files = os.listdir(DATA_DIR)
    sources = [f.split(".json")[0] for f in files if f.endswith(".json")]
    return sources


def clean_sentence(output):
    """清理 GPT 生成的句子,去除雜訊"""
    output = re.sub(r"Write.*?beginners\.", "", output, flags=re.IGNORECASE).strip()
    output = re.sub(r"\*\*?\d+\.*\*\*", "", output).strip()
    if not output.endswith("."):
        output += "."
    return output


def get_words_with_sentences(source, n):
    """抽取單字 + 生成例句,回傳結果和狀態"""
    status = []
    display_result = ""
    try:
        # 讀取單字庫資料
        data_path = os.path.join(DATA_DIR, f"{source}.json")
        with open(data_path, 'r', encoding='utf-8') as f:
            words = json.load(f)

        # 隨機抽取
        selected_words = random.sample(words, n)
        results = []

        for i, word_data in enumerate(selected_words):
            status.append(f"正在生成第 {i + 1}/{n} 個單字 [{word_data['word']}] 例句...")
            word = word_data['word']

            # GPT 造句 Prompt
            prompt = f"Use the word '{word}' in a simple English sentence suitable for beginners. Output only the sentence."

            inputs = tokenizer(prompt, return_tensors="pt")
            outputs = model.generate(**inputs, max_new_tokens=30)
            sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)

            clean_output = clean_sentence(sentence)

            results.append({
                "word": word,
                "phonetic": word_data["phonetic"],
                "sentence": clean_output
            })

            # 美化輸出文字
            display_result += f"""
            <div style="border-bottom: 1px solid #ddd; margin-bottom: 10px; padding-bottom: 5px;">
                <p><strong>📖 單字:</strong> {word}</p>
                <p><strong>🔤 音標:</strong> {word_data['phonetic']}</p>
                <p><strong>✍️ 例句:</strong> {clean_output}</p>
            </div>
            """

        status.append("✅ 完成!")

        # 以HTML形式回傳美化後的結果
        return display_result, "\n".join(status)

    except Exception as e:
        status.append(f"❌ 發生錯誤: {str(e)}")
        return f"<p style='color:red;'>發生錯誤:{str(e)}</p>", "\n".join(status)