Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | @@ -18,6 +18,7 @@ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration | |
| 18 | 
             
            processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
         | 
| 19 | 
             
            model = Qwen2VLForConditionalGeneration.from_pretrained(
         | 
| 20 | 
             
                "lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
         | 
|  | |
| 21 | 
             
                # attn_implementation="flash_attention_2",
         | 
| 22 | 
             
                # torch_dtype=torch.bfloat16,
         | 
| 23 | 
             
            )
         | 
| @@ -45,7 +46,7 @@ messages = [ | |
| 45 |  | 
| 46 | 
             
            # Apply chat template and tokenize
         | 
| 47 | 
             
            text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         | 
| 48 | 
            -
            inputs = processor(text=text, images=image, return_tensors="pt")
         | 
| 49 |  | 
| 50 | 
             
            # Run inference to obtain logits
         | 
| 51 | 
             
            with torch.no_grad():
         | 
|  | |
| 18 | 
             
            processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
         | 
| 19 | 
             
            model = Qwen2VLForConditionalGeneration.from_pretrained(
         | 
| 20 | 
             
                "lightonai/MonoQwen2-VL-2B-LoRA-Reranker",
         | 
| 21 | 
            +
                device_map="auto",
         | 
| 22 | 
             
                # attn_implementation="flash_attention_2",
         | 
| 23 | 
             
                # torch_dtype=torch.bfloat16,
         | 
| 24 | 
             
            )
         | 
|  | |
| 46 |  | 
| 47 | 
             
            # Apply chat template and tokenize
         | 
| 48 | 
             
            text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         | 
| 49 | 
            +
            inputs = processor(text=text, images=image, return_tensors="pt").to("cuda")
         | 
| 50 |  | 
| 51 | 
             
            # Run inference to obtain logits
         | 
| 52 | 
             
            with torch.no_grad():
         | 

