Update README.md
Browse files
README.md
CHANGED
@@ -55,7 +55,7 @@ To run inference of Llama 3.3 70B Instruct AWQ in INT4 precision, the AWQ model
|
|
55 |
import torch
|
56 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
|
57 |
|
58 |
-
model_id = "
|
59 |
quantization_config = AwqConfig(
|
60 |
bits=4,
|
61 |
fuse_max_seq_len=512, # Note: Update this as per your use-case
|
@@ -102,7 +102,7 @@ import torch
|
|
102 |
from awq import AutoAWQForCausalLM
|
103 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
104 |
|
105 |
-
model_id = "
|
106 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
107 |
model = AutoAWQForCausalLM.from_pretrained(
|
108 |
model_id,
|
@@ -236,7 +236,6 @@ logging.getLogger('vllm').setLevel(logging.ERROR)
|
|
236 |
logging.basicConfig(level=logging.INFO)
|
237 |
|
238 |
|
239 |
-
pd.set_option('display.max_columns', None)
|
240 |
|
241 |
class vLLMInterfaceAWQ:
|
242 |
def initializeawq(self):
|
|
|
55 |
import torch
|
56 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
|
57 |
|
58 |
+
model_id = "vicky4s4s/llama-3.3-70b-instruct-AWQ-INT4"
|
59 |
quantization_config = AwqConfig(
|
60 |
bits=4,
|
61 |
fuse_max_seq_len=512, # Note: Update this as per your use-case
|
|
|
102 |
from awq import AutoAWQForCausalLM
|
103 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
104 |
|
105 |
+
model_id = "vicky4s4s/llama-3.3-70b-instruct-AWQ-INT4"
|
106 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
107 |
model = AutoAWQForCausalLM.from_pretrained(
|
108 |
model_id,
|
|
|
236 |
logging.basicConfig(level=logging.INFO)
|
237 |
|
238 |
|
|
|
239 |
|
240 |
class vLLMInterfaceAWQ:
|
241 |
def initializeawq(self):
|