Spaces:

zeeshan391
/

fast_api_deploy

Sleeping

zeeshan391 commited on Sep 12, 2024

Commit

128258c

verified ·

1 Parent(s): 4592519

updated

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from langchain_community.llms import LlamaCpp
 from huggingface_hub.file_download import http_get
-# from llama_cpp import Llama
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
 from langchain_core.prompts import ChatPromptTemplate
@@ -45,15 +45,25 @@ def load_model(
     #     n_ctx=1024
     # )
-    model = LlamaCpp(
         model_path=final_model_path,
         temperature=0.3,
         max_tokens=2000,
-        top_p=1,
         n_ctx=1024,
-        callback_manager=callback_manager,
-        verbose=True,
     )
     print("Model loaded!")
     return model

 from pydantic import BaseModel
 from langchain_community.llms import LlamaCpp
 from huggingface_hub.file_download import http_get
+from llama_cpp import Llama
 from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
 from langchain_core.prompts import ChatPromptTemplate
     #     n_ctx=1024
     # )
+    # model = LlamaCpp(
+    #     model_path=final_model_path,
+    #     temperature=0.3,
+    #     max_tokens=2000,
+    #     top_p=1,
+    #     n_ctx=1024,
+    #     callback_manager=callback_manager,
+    #     verbose=True,
+    # )
+    model = Llama(
         model_path=final_model_path,
         temperature=0.3,
         max_tokens=2000,
         n_ctx=1024,
+        # n_threads=8,
+        echo=False
     )
     print("Model loaded!")
     return model