VocabLine / app.py
dayuian's picture
Create app.py
f6bff5d verified
raw
history blame
1.69 kB
import gradio as gr
import json
import random
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
# 模型初始化(Hugging Face Spaces會跑)
model_name = "mistralai/Mistral-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# 資料夾路徑
DATA_DIR = "./data"
# 核心函數:抽單字+造句
def get_words_with_sentences(source="common3000", n=10):
try:
# 動態讀取指定資料檔
data_path = os.path.join(DATA_DIR, f"{source}.json")
with open(data_path, 'r', encoding='utf-8') as f:
words = json.load(f)
# 隨機抽取
selected_words = random.sample(words, n)
results = []
# 每個單字請 GPT 造句
for word_data in selected_words:
word = word_data['word']
prompt = f"Write a simple English sentence using the word '{word}' suitable for beginners."
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=30)
sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
results.append({
"word": word,
"phonetic": word_data["phonetic"],
"sentence": sentence
})
return results
except Exception as e:
return [{"error": f"發生錯誤: {str(e)}"}]
# Gradio 介面設定
demo = gr.Interface(
fn=get_words_with_sentences,
inputs=[
gr.Textbox(value="common3000", label="選擇單字庫"),
gr.Number(value=10, label="抽幾個單字")
],
outputs="json"
)
demo.launch()