Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		Aleksandr Maiorov
		
	commited on
		
		
					Commit 
							
							·
						
						a844653
	
1
								Parent(s):
							
							534b1f4
								
тестирование
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,26 +1,85 @@ | |
|  | |
| 1 | 
             
            from fastapi import FastAPI
         | 
| 2 | 
             
            from llama_cpp import Llama
         | 
| 3 |  | 
| 4 | 
             
            app = FastAPI()
         | 
| 5 |  | 
| 6 | 
            -
             | 
| 7 |  | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
            	filename="Phi-3.5-mini-instruct-IQ2_M.gguf",
         | 
| 12 | 
             
            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 13 |  | 
| 14 |  | 
| 15 | 
             
            @app.post("/predict")
         | 
| 16 | 
             
            async def predict(text: str):
         | 
| 17 | 
             
                # Генерация ответа с помощью модели
         | 
| 18 | 
            -
                 | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
            			"role": "user",
         | 
| 22 | 
            -
            			"content": text
         | 
| 23 | 
            -
            		}
         | 
| 24 | 
            -
            	]
         | 
| 25 | 
            -
                )
         | 
| 26 | 
            -
                return {"response": result[0]["generated_text"]}
         | 
|  | |
| 1 | 
            +
            import logging
         | 
| 2 | 
             
            from fastapi import FastAPI
         | 
| 3 | 
             
            from llama_cpp import Llama
         | 
| 4 |  | 
| 5 | 
             
            app = FastAPI()
         | 
| 6 |  | 
| 7 | 
            +
            CHAT_TEMPLATE = '<|system|> {system_prompt}<|end|><|user|> {prompt}<|end|><|assistant|>'.strip()
         | 
| 8 |  | 
| 9 | 
            +
            logging.basicConfig(
         | 
| 10 | 
            +
                format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
         | 
| 11 | 
            +
                level=logging.INFO
         | 
|  | |
| 12 | 
             
            )
         | 
| 13 | 
            +
            logger = logging.getLogger(__name__)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            logger.info("Запускаемся... 🥳🥳🥳")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Инициализация модели
         | 
| 18 | 
            +
            try:
         | 
| 19 | 
            +
                # загрузка модели для локального хранилища
         | 
| 20 | 
            +
                # llm = Llama(
         | 
| 21 | 
            +
                #     model_path="./models/phi-3-mini-4k-instruct-q4.gguf",
         | 
| 22 | 
            +
                #     verbose=False,
         | 
| 23 | 
            +
                #     n_gpu_layers=-1,
         | 
| 24 | 
            +
                #     n_ctx=4096
         | 
| 25 | 
            +
                # )
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                logger.info("Загрузка модели...")
         | 
| 28 | 
            +
                llm = Llama.from_pretrained(
         | 
| 29 | 
            +
                    repo_id='bartowski/Phi-3.5-mini-instruct-GGUF',
         | 
| 30 | 
            +
                    filename='Phi-3.5-mini-instruct-Q6_K_L.gguf',
         | 
| 31 | 
            +
                    n_gpu_layers=-1,
         | 
| 32 | 
            +
                    n_ctx=4096,
         | 
| 33 | 
            +
                )
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            except Exception as e:
         | 
| 36 | 
            +
                logger.error(f"Ошибка загрузки модели: {str(e)}")
         | 
| 37 | 
            +
                raise
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            # составление промта для модели
         | 
| 41 | 
            +
            def create_prompt(text: str) -> str | None:
         | 
| 42 | 
            +
                try:
         | 
| 43 | 
            +
                    user_input = text
         | 
| 44 | 
            +
                    logger.info(f"Получено сообщение: {user_input}")
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    system_prompt = 'Ответ должен быть точным и кратким и если возможно шутливым.'
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    # Генерация шаблона
         | 
| 49 | 
            +
                    return CHAT_TEMPLATE.format(
         | 
| 50 | 
            +
                        system_prompt=system_prompt,
         | 
| 51 | 
            +
                        prompt=user_input,
         | 
| 52 | 
            +
                    )
         | 
| 53 | 
            +
                except Exception as e:
         | 
| 54 | 
            +
                    logger.error(e)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
             | 
| 57 | 
            +
            def generate_response(prompt: str) -> str:
         | 
| 58 | 
            +
                try:
         | 
| 59 | 
            +
                    # Обработка текстового сообщения
         | 
| 60 | 
            +
                    output = llm(
         | 
| 61 | 
            +
                        prompt,
         | 
| 62 | 
            +
                        max_tokens=512,
         | 
| 63 | 
            +
                        stop=["<|end|>"],
         | 
| 64 | 
            +
                    )
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                    logger.info('Output:')
         | 
| 67 | 
            +
                    logger.info(output)
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    response: str = output['choices'][0]['text']
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    # Отправка ответа
         | 
| 72 | 
            +
                    if response:
         | 
| 73 | 
            +
                        return response
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                    return 'Произошла ошибка при обработке запроса'
         | 
| 76 | 
            +
                except Exception as e:
         | 
| 77 | 
            +
                    logger.error(f"Ошибка обработки сообщения: {str(e)}")
         | 
| 78 |  | 
| 79 |  | 
| 80 | 
             
            @app.post("/predict")
         | 
| 81 | 
             
            async def predict(text: str):
         | 
| 82 | 
             
                # Генерация ответа с помощью модели
         | 
| 83 | 
            +
                prompt = create_prompt(text)
         | 
| 84 | 
            +
                response = generate_response(prompt)
         | 
| 85 | 
            +
                return {"response": response}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  |