File size: 3,779 Bytes
61d946f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c364eb
61d946f
7c364eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9acf24
 
7c364eb
b9acf24
 
 
 
 
786bf8b
b9acf24
 
c5d274e
b9acf24
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# import gradio as gr
# from huggingface_hub import InferenceClient

# """
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
# """
# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")

# ## None type 
# def respond(
#     message: str,
#     history: list[tuple[str, str]],  # This will not be used
#     system_message: str,
#     max_tokens: int,
#     temperature: float,
#     top_p: float,
# ):
#     messages = [{"role": "system", "content": system_message}]
    
#     # Append only the latest user message





#     messages.append({"role": "user", "content": message})

#     response = ""

#     try:
#         # Generate response from the model
#         for message in client.chat_completion(
#             messages,
#             max_tokens=max_tokens,
#             stream=True,
#             temperature=temperature,
#             top_p=top_p,
#         ):
#             if message.choices[0].delta.content is not None:
#                 token = message.choices[0].delta.content
#                 response += token
#             yield response
#     except Exception as e:
#         yield f"An error occurred: {e}"
#     ],
# )


# if __name__ == "__main__":
#     demo.launch()


##Running smothly CHATBOT

# import gradio as gr
# from huggingface_hub import InferenceClient

# """
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
# """
# client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")

# def respond(
#     message: str,
#     history: list[tuple[str, str]],  # This will not be used
#     system_message: str,
#     max_tokens: int,
#     temperature: float,
#     top_p: float,
# ):
#     # Build the messages list
#     messages = [{"role": "system", "content": system_message}]
#     messages.append({"role": "user", "content": message})

#     response = ""

#     try:
#         # Generate response from the model
#         for msg in client.chat_completion(
#             messages=messages,
#             max_tokens=max_tokens,
#             stream=True,
#             temperature=temperature,
#             top_p=top_p,
#         ):
#             if msg.choices[0].delta.content is not None:
#                 token = msg.choices[0].delta.content
#                 response += token
#             yield response
#     except Exception as e:
#         yield f"An error occurred: {e}"

# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
#     respond,
#     additional_inputs=[
#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#         gr.Slider(
#             minimum=0.1,
#             maximum=1.0,
#             value=0.95,
#             step=0.05,
#             label="Top-p (nucleus sampling)",
#         ),
#     ],
# )

# if __name__ == "__main__":
#     demo.launch()


# Use a pipeline as a high-level helper
from transformers import pipeline

messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B-Instruct")
pipe(messages)

# # Load model directly
# from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")
# model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")