GPTfree api
Create app.py
5488267 verified
raw
history blame
1.77 kB
from vllm import LLM
from vllm.sampling_params import SamplingParams
from huggingface_hub import hf_hub_download
from datetime import datetime, timedelta
model_name = "mistralai/Pixtral-Large-Instruct-2411"
def load_system_prompt(repo_id: str, filename: str) -> str:
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
with open(file_path, 'r') as file:
system_prompt = file.read()
today = datetime.today().strftime('%Y-%m-%d')
yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
model_name = repo_id.split("/")[-1]
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
SYSTEM_PROMPT = load_system_prompt(model_name, "SYSTEM_PROMPT.txt")
image_url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/europe.png"
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Which of the depicted countries has the best food? Which the second and third and fourth? Name the country, its color on the map and one its city that is visible on the map, but is not the capital. Make absolutely sure to only name a city that can be seen on the map.",
},
{"type": "image_url", "image_url": {"url": image_url}},
],
},
]
sampling_params = SamplingParams(max_tokens=512)
# note that running this model on GPU requires over 300 GB of GPU RAM
llm = LLM(model=model_name, config_format="mistral", load_format="mistral", tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4})
outputs = llm.chat(messages, sampling_params=sampling_params)
print(outputs[0].outputs[0].text)