czczup commited on
Commit
75bf5ed
·
verified ·
1 Parent(s): 712b352

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +16 -16
README.md CHANGED
@@ -78,7 +78,7 @@ Here, we have conducted only a simple performance comparison. For more detailed
78
 
79
  We provide an example code to run InternVL-Chat-V1-1 using `transformers`.
80
 
81
- We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/). Currently, due to the limited GPU resources with public IP addresses, we can only deploy models up to a maximum of 26B. We will expand soon and deploy larger models to the online demo.
82
 
83
  > Please use transformers==4.37.2 to ensure the model works normally.
84
 
@@ -172,9 +172,9 @@ model = AutoModel.from_pretrained(
172
  torch_dtype=torch.bfloat16,
173
  low_cpu_mem_usage=True,
174
  trust_remote_code=True).eval().cuda()
175
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
176
 
177
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
178
  question = 'Hello, who are you?'
179
  response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
180
  print(f'User: {question}')
@@ -199,13 +199,13 @@ model = AutoModel.from_pretrained(
199
  torch_dtype=torch.bfloat16,
200
  low_cpu_mem_usage=True,
201
  trust_remote_code=True).eval().cuda()
202
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
203
 
204
  image_processor = CLIPImageProcessor.from_pretrained(path)
205
  image = Image.open('./examples/image2.jpg').resize((448, 448))
206
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
207
 
208
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
209
  question = '<image>\nPlease describe the image shortly.'
210
  response = model.chat(tokenizer, pixel_values, question, generation_config)
211
  print(f'User: {question}')
@@ -225,13 +225,13 @@ model = AutoModel.from_pretrained(
225
  torch_dtype=torch.bfloat16,
226
  low_cpu_mem_usage=True,
227
  trust_remote_code=True).eval().cuda()
228
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
229
 
230
  image_processor = CLIPImageProcessor.from_pretrained(path)
231
  image = Image.open('./examples/image2.jpg').resize((448, 448))
232
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
233
 
234
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
235
  question = '<image>\nPlease describe the image in detail.'
236
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
237
  print(f'User: {question}')
@@ -258,7 +258,7 @@ model = AutoModel.from_pretrained(
258
  torch_dtype=torch.bfloat16,
259
  low_cpu_mem_usage=True,
260
  trust_remote_code=True).eval().cuda()
261
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
262
 
263
  image_processor = CLIPImageProcessor.from_pretrained(path)
264
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
@@ -267,7 +267,7 @@ image2 = Image.open('./examples/image2.jpg').resize((448, 448))
267
  pixel_values2 = image_processor(images=image2, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
268
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
269
 
270
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
271
  question = '<image>\nDescribe the two images in detail.'
272
  response, history = model.chat(tokenizer, pixel_values, question, generation_config,
273
  history=None, return_history=True)
@@ -296,7 +296,7 @@ model = AutoModel.from_pretrained(
296
  torch_dtype=torch.bfloat16,
297
  low_cpu_mem_usage=True,
298
  trust_remote_code=True).eval().cuda()
299
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
300
 
301
  image_processor = CLIPImageProcessor.from_pretrained(path)
302
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
@@ -306,7 +306,7 @@ pixel_values2 = image_processor(images=image2, return_tensors='pt').pixel_values
306
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
307
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
308
 
309
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
310
  question = 'Image-1: <image>\nImage-2: <image>\nDescribe the two images in detail.'
311
  response, history = model.chat(tokenizer, pixel_values, question, generation_config,
312
  num_patches_list=num_patches_list, history=None, return_history=True)
@@ -333,7 +333,7 @@ model = AutoModel.from_pretrained(
333
  torch_dtype=torch.bfloat16,
334
  low_cpu_mem_usage=True,
335
  trust_remote_code=True).eval().cuda()
336
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
337
 
338
  image_processor = CLIPImageProcessor.from_pretrained(path)
339
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
@@ -343,7 +343,7 @@ pixel_values2 = image_processor(images=image2, return_tensors='pt').pixel_values
343
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
344
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
345
 
346
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
347
  questions = ['<image>\nDescribe the image in detail.'] * len(num_patches_list)
348
  responses = model.batch_chat(tokenizer, pixel_values,
349
  num_patches_list=num_patches_list,
@@ -403,9 +403,9 @@ model = AutoModel.from_pretrained(
403
  torch_dtype=torch.bfloat16,
404
  low_cpu_mem_usage=True,
405
  trust_remote_code=True).eval().cuda()
406
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
407
 
408
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False)
409
 
410
  video_path = './examples/red-panda.mp4'
411
  pixel_values, num_patches_list = load_video(video_path, num_segments=8)
@@ -436,7 +436,7 @@ from threading import Thread
436
  # Initialize the streamer
437
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=10)
438
  # Define the generation configuration
439
- generation_config = dict(num_beams=1, max_new_tokens=1024, do_sample=False, streamer=streamer)
440
  # Start the model chat in a separate thread
441
  thread = Thread(target=model.chat, kwargs=dict(
442
  tokenizer=tokenizer, pixel_values=pixel_values, question=question,
 
78
 
79
  We provide an example code to run InternVL-Chat-V1-1 using `transformers`.
80
 
81
+ We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/).
82
 
83
  > Please use transformers==4.37.2 to ensure the model works normally.
84
 
 
172
  torch_dtype=torch.bfloat16,
173
  low_cpu_mem_usage=True,
174
  trust_remote_code=True).eval().cuda()
175
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
176
 
177
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
178
  question = 'Hello, who are you?'
179
  response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
180
  print(f'User: {question}')
 
199
  torch_dtype=torch.bfloat16,
200
  low_cpu_mem_usage=True,
201
  trust_remote_code=True).eval().cuda()
202
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
203
 
204
  image_processor = CLIPImageProcessor.from_pretrained(path)
205
  image = Image.open('./examples/image2.jpg').resize((448, 448))
206
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
207
 
208
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
209
  question = '<image>\nPlease describe the image shortly.'
210
  response = model.chat(tokenizer, pixel_values, question, generation_config)
211
  print(f'User: {question}')
 
225
  torch_dtype=torch.bfloat16,
226
  low_cpu_mem_usage=True,
227
  trust_remote_code=True).eval().cuda()
228
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
229
 
230
  image_processor = CLIPImageProcessor.from_pretrained(path)
231
  image = Image.open('./examples/image2.jpg').resize((448, 448))
232
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
233
 
234
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
235
  question = '<image>\nPlease describe the image in detail.'
236
  response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
237
  print(f'User: {question}')
 
258
  torch_dtype=torch.bfloat16,
259
  low_cpu_mem_usage=True,
260
  trust_remote_code=True).eval().cuda()
261
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
262
 
263
  image_processor = CLIPImageProcessor.from_pretrained(path)
264
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
 
267
  pixel_values2 = image_processor(images=image2, return_tensors='pt').pixel_values.to(torch.bfloat16).cuda()
268
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
269
 
270
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
271
  question = '<image>\nDescribe the two images in detail.'
272
  response, history = model.chat(tokenizer, pixel_values, question, generation_config,
273
  history=None, return_history=True)
 
296
  torch_dtype=torch.bfloat16,
297
  low_cpu_mem_usage=True,
298
  trust_remote_code=True).eval().cuda()
299
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
300
 
301
  image_processor = CLIPImageProcessor.from_pretrained(path)
302
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
 
306
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
307
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
308
 
309
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
310
  question = 'Image-1: <image>\nImage-2: <image>\nDescribe the two images in detail.'
311
  response, history = model.chat(tokenizer, pixel_values, question, generation_config,
312
  num_patches_list=num_patches_list, history=None, return_history=True)
 
333
  torch_dtype=torch.bfloat16,
334
  low_cpu_mem_usage=True,
335
  trust_remote_code=True).eval().cuda()
336
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
337
 
338
  image_processor = CLIPImageProcessor.from_pretrained(path)
339
  image1 = Image.open('./examples/image1.jpg').resize((448, 448))
 
343
  pixel_values = torch.cat((pixel_values1, pixel_values2), dim=0)
344
  num_patches_list = [pixel_values1.size(0), pixel_values2.size(0)]
345
 
346
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
347
  questions = ['<image>\nDescribe the image in detail.'] * len(num_patches_list)
348
  responses = model.batch_chat(tokenizer, pixel_values,
349
  num_patches_list=num_patches_list,
 
403
  torch_dtype=torch.bfloat16,
404
  low_cpu_mem_usage=True,
405
  trust_remote_code=True).eval().cuda()
406
+ tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
407
 
408
+ generation_config = dict(max_new_tokens=1024, do_sample=False)
409
 
410
  video_path = './examples/red-panda.mp4'
411
  pixel_values, num_patches_list = load_video(video_path, num_segments=8)
 
436
  # Initialize the streamer
437
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=10)
438
  # Define the generation configuration
439
+ generation_config = dict(max_new_tokens=1024, do_sample=False, streamer=streamer)
440
  # Start the model chat in a separate thread
441
  thread = Thread(target=model.chat, kwargs=dict(
442
  tokenizer=tokenizer, pixel_values=pixel_values, question=question,