Spaces:

dayuian
/

VocabLine

Sleeping

File size: 1,051 Bytes

c2aeaf4
e65b5c7
c2aeaf4
e65b5c7
c2aeaf4
4eddfdb
 
c2aeaf4
 
e65b5c7
 
c2aeaf4
e65b5c7
c2aeaf4
e65b5c7
c2aeaf4
 
e65b5c7
 
c2aeaf4
e65b5c7
c2aeaf4
 
275d15c
e65b5c7
c2aeaf4
e65b5c7
c2aeaf4
275d15c
e65b5c7
 
 
 
 
c2aeaf4

from transformers import AutoModelForCausalLM, AutoTokenizer
import re

# 可用模型清單
MODEL_LIST = [
  "gpt2",
  "EleutherAI/pythia-410m"
]

# 模型快取，避免每次重新載入
MODEL_CACHE = {}

# 加載模型
def load_model(model_name):
    if model_name not in MODEL_CACHE:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForCausalLM.from_pretrained(model_name)
        MODEL_CACHE[model_name] = (tokenizer, model)
    return MODEL_CACHE[model_name]

# 生成 AI 例句
def generate_sentence(word, model_name):
    tokenizer, model = load_model(model_name)

    prompt = f"Example sentence using '{word}':"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=20)
    sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # 清理生成句子
    sentence = sentence.split(":")[-1].strip()
    sentence = re.sub(r'[^a-zA-Z0-9, .!?]', '', sentence)
    if not sentence.endswith("."):
        sentence += "."
    return sentence