vicky4s4s
/

llama-3.3-70b-instruct-AWQ-INT4

Text Generation

4-bit precision

Model card Files Files and versions Community

vicky4s4s commited on 25 days ago

Commit

536f2c3

·

verified ·

1 Parent(s): 24bd471

Update README.md

Files changed (1) hide show

README.md +2 -3

README.md CHANGED Viewed

@@ -55,7 +55,7 @@ To run inference of Llama 3.3 70B Instruct AWQ in INT4 precision, the AWQ model
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
-model_id = "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4"
 quantization_config = AwqConfig(
     bits=4,
     fuse_max_seq_len=512, # Note: Update this as per your use-case
@@ -102,7 +102,7 @@ import torch
 from awq import AutoAWQForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
-model_id = "ibnzterrell/Meta-Llama-3.3-70B-Instruct-AWQ-INT4"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoAWQForCausalLM.from_pretrained(
   model_id,
@@ -236,7 +236,6 @@ logging.getLogger('vllm').setLevel(logging.ERROR)
 logging.basicConfig(level=logging.INFO)
-pd.set_option('display.max_columns', None)
 class vLLMInterfaceAWQ:
     def initializeawq(self):

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, AwqConfig
+model_id = "vicky4s4s/llama-3.3-70b-instruct-AWQ-INT4"
 quantization_config = AwqConfig(
     bits=4,
     fuse_max_seq_len=512, # Note: Update this as per your use-case
 from awq import AutoAWQForCausalLM
 from transformers import AutoModelForCausalLM, AutoTokenizer
+model_id = "vicky4s4s/llama-3.3-70b-instruct-AWQ-INT4"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoAWQForCausalLM.from_pretrained(
   model_id,
 logging.basicConfig(level=logging.INFO)
 class vLLMInterfaceAWQ:
     def initializeawq(self):