import gradio as gr
import json
import random
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

# 模型初始化（Hugging Face Spaces會跑）
model_name = "mistralai/Mistral-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# 資料夾路徑
DATA_DIR = "./data"

# 核心函數：抽單字＋造句
def get_words_with_sentences(source="common3000", n=10):
    try:
        # 動態讀取指定資料檔
        data_path = os.path.join(DATA_DIR, f"{source}.json")
        with open(data_path, 'r', encoding='utf-8') as f:
            words = json.load(f)

        # 隨機抽取
        selected_words = random.sample(words, n)
        results = []

        # 每個單字請 GPT 造句
        for word_data in selected_words:
            word = word_data['word']
            prompt = f"Write a simple English sentence using the word '{word}' suitable for beginners."

            inputs = tokenizer(prompt, return_tensors="pt")
            outputs = model.generate(**inputs, max_new_tokens=30)
            sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)

            results.append({
                "word": word,
                "phonetic": word_data["phonetic"],
                "sentence": sentence
            })

        return results

    except Exception as e:
        return [{"error": f"發生錯誤: {str(e)}"}]

# Gradio 介面設定
demo = gr.Interface(
    fn=get_words_with_sentences,
    inputs=[
        gr.Textbox(value="common3000", label="選擇單字庫"),
        gr.Number(value=10, label="抽幾個單字")
    ],
    outputs="json"
)

demo.launch()