Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from typing import Union, Optional, SupportsIndex | |
import requests | |
from fastapi import FastAPI | |
from llama_cpp import Llama | |
app = FastAPI() | |
TELEGRAM_TOKEN = os.getenv('TELEGRAM_TOKEN') | |
CHAT_TEMPLATE = '<|system|> {system_prompt}<|end|><|user|> {prompt}<|end|><|assistant|>'.strip() | |
SYSTEM_PROMPT = '{prompt}' | |
logging.basicConfig( | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
level=logging.INFO | |
) | |
logger = logging.getLogger(__name__) | |
logger.info("Запускаемся... 🥳🥳🥳") | |
REPO_ID = "Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct-GGUF" | |
FILENAME = "Vikhr-Qwen-2.5-1.5b-Instruct-Q8_0.gguf", | |
# Инициализация модели | |
try: | |
logger.info(f"Загрузка модели {FILENAME}...") | |
# загрузка модели для локального хранилища | |
# llm = Llama( | |
# model_path=f"./models/{model_name}.gguf", | |
# verbose=False, | |
# n_gpu_layers=-1, | |
# n_ctx=1512, | |
# temperature=0.3, | |
# num_return_sequences=1, | |
# no_repeat_ngram_size=2, | |
# top_k=50, | |
# top_p=0.95, | |
# ) | |
# if not llm: | |
LLM = Llama.from_pretrained( | |
repo_id=REPO_ID, | |
filename='Vikhr-Qwen-2.5-1.5b-Instruct-Q8_0.gguf', | |
n_gpu_layers=-1, | |
n_ctx=1512, | |
temperature=0.3, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
top_k=50, | |
top_p=0.95, | |
) | |
except Exception as e: | |
logger.error(f"Ошибка загрузки модели: {str(e)}") | |
raise | |
# составление промта для модели | |
def create_prompt(text: str) -> Union[str, None]: | |
try: | |
user_input = text | |
logger.info(f"Получено сообщение: {user_input}") | |
# Генерация шаблона | |
return CHAT_TEMPLATE.format( | |
system_prompt=SYSTEM_PROMPT.format(promt='Ответ должен быть точным, кратким и с юмором.'), | |
prompt=user_input, | |
) | |
except Exception as e: | |
logger.error(e) | |
def generate_response(prompt: str) -> Optional[str]: | |
try: | |
# Обработка текстового сообщения | |
output = LLM( | |
prompt, | |
max_tokens=64, | |
stop=["<|end|>"], | |
) | |
logger.info('Output:') | |
logger.info(output) | |
response = output['choices'][0]['text'] | |
# Отправка ответа | |
if response: | |
return response | |
return 'Произошла ошибка при обработке запроса' | |
except Exception as e: | |
logger.error(f"Ошибка обработки сообщения: {str(e)}") | |
async def send_to_telegram(message): | |
url = f"https://api.telegram.org/bot7517190315:AAE4TAhZm4ybTEPGe_KOYajFxHttNOqUtaY/sendMessage" | |
payload = { | |
"chat_id": '719751843', | |
"text": message | |
} | |
response = requests.post(url, json=payload) | |
return "Сообщение отправлено!" if response.ok else "Ошибка!" | |
async def root(): | |
await send_to_telegram('aaaaaaaaaaa') | |
return {"message": "Hello World"} | |
async def predict(response): | |
# Генерация ответа с помощью модели | |
logger.info('post/webhook -> response:') | |
logger.info(response) | |
# prompt = create_prompt(text) | |
# response = generate_response(prompt) | |
return {"response": response} | |