pankajmathur commited on
Commit
2cb82d5
·
verified ·
1 Parent(s): 5b6c4ba

Upload Orca_Mini_Chat_8bit_Phi_4.ipynb

Browse files
Files changed (1) hide show
  1. Orca_Mini_Chat_8bit_Phi_4.ipynb +203 -0
Orca_Mini_Chat_8bit_Phi_4.ipynb ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "id": "TFu_ibC1eYrz"
8
+ },
9
+ "outputs": [],
10
+ "source": [
11
+ "!pip install torch transformers bitsandbytes -q"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "metadata": {
18
+ "id": "Zs7QNs0Tet6r"
19
+ },
20
+ "outputs": [],
21
+ "source": [
22
+ "import torch\n",
23
+ "from transformers import pipeline, BitsAndBytesConfig\n",
24
+ "from IPython.display import clear_output\n",
25
+ "from google.colab import output"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "metadata": {
32
+ "id": "lOhAjLdI2oFt"
33
+ },
34
+ "outputs": [],
35
+ "source": [
36
+ "quantization_config = BitsAndBytesConfig(\n",
37
+ " load_in_8bit=True\n",
38
+ ")"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "metadata": {
45
+ "id": "v4uIN6uIeyl3"
46
+ },
47
+ "outputs": [],
48
+ "source": [
49
+ "class ChatBot:\n",
50
+ " _instance = None\n",
51
+ " _current_model = None\n",
52
+ "\n",
53
+ " def __init__(self, model_slug=None):\n",
54
+ " if model_slug and model_slug != ChatBot._current_model:\n",
55
+ " self.load_model(model_slug)\n",
56
+ " ChatBot._current_model = model_slug\n",
57
+ "\n",
58
+ " self.messages = []\n",
59
+ " self.max_tokens = 2048\n",
60
+ " self.temperature = 0.5\n",
61
+ " self.top_k = 100\n",
62
+ " self.top_p = 0.95\n",
63
+ "\n",
64
+ " @classmethod\n",
65
+ " def get_instance(cls, model_slug=None):\n",
66
+ " if not cls._instance or (model_slug and model_slug != cls._current_model):\n",
67
+ " cls._instance = cls(model_slug)\n",
68
+ " return cls._instance\n",
69
+ "\n",
70
+ " def load_model(self, model_slug):\n",
71
+ " print(f\"Loading model {model_slug}...\")\n",
72
+ " self.pipeline = pipeline(\n",
73
+ " \"text-generation\",\n",
74
+ " model=model_slug,\n",
75
+ " model_kwargs={\"quantization_config\": quantization_config},\n",
76
+ " device_map=\"auto\",\n",
77
+ " )\n",
78
+ " clear_output()\n",
79
+ " print(\"Model loaded successfully!\")\n",
80
+ "\n",
81
+ " def reset_conversation(self, system_message):\n",
82
+ " \"\"\"Reset the conversation with a new system message\"\"\"\n",
83
+ " self.messages = [{\"role\": \"system\", \"content\": system_message}]\n",
84
+ "\n",
85
+ " def get_response(self, user_input):\n",
86
+ " \"\"\"Get response with current parameters\"\"\"\n",
87
+ " self.messages.append({\"role\": \"user\", \"content\": user_input})\n",
88
+ " outputs = self.pipeline(\n",
89
+ " self.messages,\n",
90
+ " max_new_tokens=self.max_tokens,\n",
91
+ " do_sample=True,\n",
92
+ " temperature=self.temperature,\n",
93
+ " top_k=self.top_k,\n",
94
+ " top_p=self.top_p\n",
95
+ " )\n",
96
+ " response = outputs[0][\"generated_text\"][-1]\n",
97
+ " content = response.get('content', 'No content available')\n",
98
+ " self.messages.append({\"role\": \"assistant\", \"content\": content})\n",
99
+ " return content\n",
100
+ "\n",
101
+ " def update_params(self, max_tokens=None, temperature=None, top_k=None, top_p=None):\n",
102
+ " \"\"\"Update generation parameters\"\"\"\n",
103
+ " if max_tokens is not None:\n",
104
+ " self.max_tokens = max_tokens\n",
105
+ " if temperature is not None:\n",
106
+ " self.temperature = temperature\n",
107
+ " if top_k is not None:\n",
108
+ " self.top_k = top_k\n",
109
+ " if top_p is not None:\n",
110
+ " self.top_p = top_p"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": null,
116
+ "metadata": {
117
+ "id": "H2n_6Xcue3Vn"
118
+ },
119
+ "outputs": [],
120
+ "source": [
121
+ "def run_chatbot(\n",
122
+ " model=None,\n",
123
+ " system_message=\"You are Orca Mini, You are expert in following given instructions, Think step by step before coming up with final answer\",\n",
124
+ " max_tokens=None,\n",
125
+ " temperature=None,\n",
126
+ " top_k=None,\n",
127
+ " top_p=None,\n",
128
+ "):\n",
129
+ " try:\n",
130
+ " # Get or create chatbot instance\n",
131
+ " chatbot = ChatBot.get_instance(model)\n",
132
+ "\n",
133
+ " # Update parameters if provided\n",
134
+ " chatbot.update_params(max_tokens, temperature, top_k, top_p)\n",
135
+ "\n",
136
+ " # Reset conversation with new system message\n",
137
+ " chatbot.reset_conversation(system_message)\n",
138
+ "\n",
139
+ " print(\"Chatbot: Hi! Type 'quit' to exit.\")\n",
140
+ "\n",
141
+ " while True:\n",
142
+ " user_input = input(\"You: \").strip()\n",
143
+ " if user_input.lower() == 'quit':\n",
144
+ " break\n",
145
+ " try:\n",
146
+ " response = chatbot.get_response(user_input)\n",
147
+ " print(\"Chatbot:\", response)\n",
148
+ " except Exception as e:\n",
149
+ " print(f\"Chatbot: An error occurred: {str(e)}\")\n",
150
+ " print(\"Please try again.\")\n",
151
+ "\n",
152
+ " except Exception as e:\n",
153
+ " print(f\"Error in chatbot: {str(e)}\")"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "metadata": {
160
+ "colab": {
161
+ "background_save": true
162
+ },
163
+ "id": "JEqgoAH2fC6h"
164
+ },
165
+ "outputs": [],
166
+ "source": [
167
+ "run_chatbot(model=\"pankajmathur/orca_mini_phi-4\")"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": null,
173
+ "metadata": {
174
+ "id": "tGW8wsfAfHDf"
175
+ },
176
+ "outputs": [],
177
+ "source": [
178
+ "# # change system message\n",
179
+ "# run_chatbot(\n",
180
+ "# system_message=\"You are Orca Mini, You are expert in logic, Think step by step before coming up with final answer\",\n",
181
+ "# max_tokens=1024,\n",
182
+ "# temperature=0.3\n",
183
+ "# )"
184
+ ]
185
+ }
186
+ ],
187
+ "metadata": {
188
+ "accelerator": "GPU",
189
+ "colab": {
190
+ "gpuType": "T4",
191
+ "provenance": []
192
+ },
193
+ "kernelspec": {
194
+ "display_name": "Python 3",
195
+ "name": "python3"
196
+ },
197
+ "language_info": {
198
+ "name": "python"
199
+ }
200
+ },
201
+ "nbformat": 4,
202
+ "nbformat_minor": 0
203
+ }