Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,13 +1,22 @@ | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
             
            import torch
         | 
| 3 | 
            -
             | 
|  | |
| 4 |  | 
| 5 | 
            -
            # Define the  | 
| 6 | 
             
            model_name = "CreitinGameplays/ConvAI-9b"
         | 
| 7 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 | 
             
            # Load tokenizer and model
         | 
| 9 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_name)
         | 
| 10 | 
            -
            model = AutoModelForCausalLM.from_pretrained(model_name)
         | 
| 11 | 
             
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 12 | 
             
            model.to(device)
         | 
| 13 |  | 
|  | |
| 1 | 
             
            import gradio as gr
         | 
| 2 | 
             
            import torch
         | 
| 3 | 
            +
            import bitsandbytes as bnb
         | 
| 4 | 
            +
            from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
         | 
| 5 |  | 
| 6 | 
            +
            # Define the model name
         | 
| 7 | 
             
            model_name = "CreitinGameplays/ConvAI-9b"
         | 
| 8 |  | 
| 9 | 
            +
            # Quantization configuration with bitsandbytes settings
         | 
| 10 | 
            +
            bnb_config = BitsAndBytesConfig(
         | 
| 11 | 
            +
                load_in_4bit=True,
         | 
| 12 | 
            +
                bnb_4bit_use_double_quant=True,
         | 
| 13 | 
            +
                bnb_4bit_quant_type="nf4",
         | 
| 14 | 
            +
                bnb_4bit_compute_dtype=torch.bfloat16
         | 
| 15 | 
            +
            )
         | 
| 16 | 
            +
             | 
| 17 | 
             
            # Load tokenizer and model
         | 
| 18 | 
             
            tokenizer = AutoTokenizer.from_pretrained(model_name)
         | 
| 19 | 
            +
            model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, low_cpu_mem_usage=True)
         | 
| 20 | 
             
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 21 | 
             
            model.to(device)
         | 
| 22 |  | 
