import gradio as gr from functools import lru_cache import openai # 用于调用外部API import os import spaces import gradio as gr from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM import platform import torch import nltk from functools import lru_cache # 假设openai_client已定义,例如: device = "cuda" MODEL_NAME = "ByteDance-Seed/Seed-X-PPO-7B" def load_model(): model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,torch_dtype="bfloat16").to(device) print(f"Model loaded in {device}") return model model = load_model() # Loading the tokenizer once, because re-loading it takes about 1.5 seconds each time tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) @lru_cache(maxsize=100) def translate(text: str): return _translate(text) # Only assign GPU if cache not used @spaces.GPU def _translate(text: str): input_tokens = ( tokenizer(text, return_tensors="pt") .input_ids[0] .cpu() .numpy() .tolist() ) translated_chunk = model.generate( input_ids=torch.tensor([input_tokens]).to(device), max_length=len(input_tokens) + 2048, num_return_sequences=1, ) full_output = tokenizer.decode(translated_chunk[0], skip_special_tokens=True).strip() print(full_output) return full_output # def _translate(text: str, src_lang: str, tgt_lang: str): # prompt = f"Translate the following text from {src_lang} to {tgt_lang}. Direct output translation result without any explaination:\n\n{text}" # key=os.getenv('key') # openai_client = openai.OpenAI(base_url="https://ssapi.cppbear.site/v1", api_key=key) # response = openai_client.chat.completions.create( # model="tbai.xin-dpsk-deepseek-v3", # 如gpt-3.5-turbo或其他兼容模型 # messages=[{"role": "user", "content": prompt}], # max_tokens=30240, # temperature=0.0 # ) # print(response) # return response.choices[0].message.content.strip() description = """