Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
import random | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import os | |
import re | |
# 模型初始化 | |
model_name = "EleutherAI/pythia-410m" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# 資料夾 | |
DATA_DIR = "./data" | |
# 自動掃描資料夾生成選單 | |
def get_sources(): | |
files = os.listdir(DATA_DIR) | |
sources = [f.split(".json")[0] for f in files if f.endswith(".json")] | |
return sources | |
# 清理 GPT 生成句子的雜訊 | |
def clean_sentence(output): | |
output = re.sub(r"Write.*?beginners\.", "", output, flags=re.IGNORECASE).strip() | |
output = re.sub(r"\*\*?\d+\.*\*\*", "", output).strip() | |
if not output.endswith("."): | |
output += "." | |
return output | |
# 核心函數 | |
def get_words_with_sentences(source, n): | |
status = [] | |
try: | |
data_path = os.path.join(DATA_DIR, f"{source}.json") | |
with open(data_path, 'r', encoding='utf-8') as f: | |
words = json.load(f) | |
selected_words = random.sample(words, n) | |
results = [] | |
for i, word_data in enumerate(selected_words): | |
status.append(f"正在生成第 {i+1}/{n} 個單字 [{word_data['word']}] 例句...") | |
word = word_data['word'] | |
prompt = f"Use the word '{word}' in a simple English sentence suitable for beginners. Output only the sentence." | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate(**inputs, max_new_tokens=30) | |
sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
clean_output = clean_sentence(sentence) | |
results.append({ | |
"word": word, | |
"phonetic": word_data["phonetic"], | |
"sentence": clean_output | |
}) | |
status.append("✅ 完成!") | |
return results, status | |
except Exception as e: | |
status.append(f"❌ 發生錯誤: {str(e)}") | |
return [], status | |
# Gradio 介面 | |
demo = gr.Interface( | |
fn=get_words_with_sentences, | |
inputs=[ | |
gr.Dropdown(choices=get_sources(), value="common3000", label="選擇單字庫", interactive=True, show_clear_button=False), | |
gr.Number(value=10, label="抽幾個單字") | |
], | |
outputs=[ | |
gr.JSON(label="生成結果"), | |
gr.JSON(label="生成進度") | |
] | |
) | |
demo.launch() | |