import gradio as gr import json import random from transformers import AutoModelForCausalLM, AutoTokenizer import os # 模型初始化(Hugging Face Spaces會跑) model_name = "mistralai/Mistral-7B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # 資料夾路徑 DATA_DIR = "./data" # 核心函數:抽單字+造句 def get_words_with_sentences(source="common3000", n=10): try: # 動態讀取指定資料檔 data_path = os.path.join(DATA_DIR, f"{source}.json") with open(data_path, 'r', encoding='utf-8') as f: words = json.load(f) # 隨機抽取 selected_words = random.sample(words, n) results = [] # 每個單字請 GPT 造句 for word_data in selected_words: word = word_data['word'] prompt = f"Write a simple English sentence using the word '{word}' suitable for beginners." inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=30) sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) results.append({ "word": word, "phonetic": word_data["phonetic"], "sentence": sentence }) return results except Exception as e: return [{"error": f"發生錯誤: {str(e)}"}] # Gradio 介面設定 demo = gr.Interface( fn=get_words_with_sentences, inputs=[ gr.Textbox(value="common3000", label="選擇單字庫"), gr.Number(value=10, label="抽幾個單字") ], outputs="json" ) demo.launch()