Add Inference Client
Browse files
app.py
CHANGED
@@ -7,15 +7,16 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
|
7 |
from langchain_community.llms import HuggingFaceEndpoint
|
8 |
from langchain.prompts import PromptTemplate, ChatPromptTemplate
|
9 |
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
|
|
|
10 |
import os
|
11 |
import gradio as gr
|
12 |
import spaces
|
13 |
|
14 |
-
|
15 |
token = os.environ["API_TOKEN"]
|
16 |
|
17 |
llm = HuggingFaceEndpoint(
|
18 |
-
endpoint_url=f"{
|
19 |
huggingfacehub_api_token = f"{token}",
|
20 |
task = "text-generation",
|
21 |
max_new_tokens=128,
|
@@ -27,6 +28,8 @@ llm = HuggingFaceEndpoint(
|
|
27 |
)
|
28 |
#print(llm)
|
29 |
|
|
|
|
|
30 |
def chat_template_prompt():
|
31 |
template = """
|
32 |
Do not repeat questions and do not generate answer for user/human.Do not repeat yourself and do not create/generate dialogues.
|
|
|
7 |
from langchain_community.llms import HuggingFaceEndpoint
|
8 |
from langchain.prompts import PromptTemplate, ChatPromptTemplate
|
9 |
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
|
10 |
+
from huggingface_hub import InferenceClient
|
11 |
import os
|
12 |
import gradio as gr
|
13 |
import spaces
|
14 |
|
15 |
+
endpoint_url = "https://kp4xdy196cw81uf3.us-east-1.aws.endpoints.huggingface.cloud"
|
16 |
token = os.environ["API_TOKEN"]
|
17 |
|
18 |
llm = HuggingFaceEndpoint(
|
19 |
+
endpoint_url=f"{endpoint_url}",
|
20 |
huggingfacehub_api_token = f"{token}",
|
21 |
task = "text-generation",
|
22 |
max_new_tokens=128,
|
|
|
28 |
)
|
29 |
#print(llm)
|
30 |
|
31 |
+
client = InferenceClient(endpoint_url, token=token)
|
32 |
+
|
33 |
def chat_template_prompt():
|
34 |
template = """
|
35 |
Do not repeat questions and do not generate answer for user/human.Do not repeat yourself and do not create/generate dialogues.
|