BenK0y commited on
Commit
f8fe675
·
verified ·
1 Parent(s): 76fec6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -37
app.py CHANGED
@@ -1,42 +1,47 @@
1
- # from transformers import AutoModel, AutoTokenizer
2
-
3
- # tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
4
- # model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
5
- # model = model.eval().cuda()
6
-
7
-
8
- # input your test image
9
- # image_file = 'car.jpg'
10
-
11
- # plain texts OCR
12
- # res = model.chat(tokenizer, image_file, ocr_type='ocr')
13
-
14
- # format texts OCR:
15
- # res = model.chat(tokenizer, image_file, ocr_type='format')
16
-
17
- # fine-grained OCR:
18
- # res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_box='')
19
- # res = model.chat(tokenizer, image_file, ocr_type='format', ocr_box='')
20
- # res = model.chat(tokenizer, image_file, ocr_type='ocr', ocr_color='')
21
- # res = model.chat(tokenizer, image_file, ocr_type='format', ocr_color='')
22
-
23
- # multi-crop OCR:
24
- # res = model.chat_crop(tokenizer, image_file, ocr_type='ocr')
25
- # res = model.chat_crop(tokenizer, image_file, ocr_type='format')
 
 
 
 
26
 
27
- # render the formatted OCR results:
28
- # res = model.chat(tokenizer, image_file, ocr_type='format', render=True, save_render_file = './demo.html')
 
 
29
 
30
- # print(res)
31
 
32
- import google.generativeai as genai
33
- import os
34
 
35
- genai.configure(api_key=os.environ["AIzaSyB5WiEJf_yLMD1dMQf305EAbaPTzF_QD-I"])
36
 
37
- model = genai.GenerativeModel('gemini-1.5-flash')
38
- response = model.generate_content(
39
- text_input="the color of the car is ?",
40
- image_input="car.jpg"
41
- )
42
- print(response)
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import AutoModel, AutoTokenizer
4
+
5
+ model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
6
+ attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
7
+ model = model.eval().cuda()
8
+ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
9
+
10
+ image = Image.open('car.jpg').convert('RGB')
11
+ question = 'What is in the image?'
12
+ msgs = [{'role': 'user', 'content': [image, question]}]
13
+
14
+ res = model.chat(
15
+ image=None,
16
+ msgs=msgs,
17
+ tokenizer=tokenizer
18
+ )
19
+ print(res)
20
+
21
+ ## if you want to use streaming, please make sure sampling=True and stream=True
22
+ ## the model.chat will return a generator
23
+ res = model.chat(
24
+ image=None,
25
+ msgs=msgs,
26
+ tokenizer=tokenizer,
27
+ sampling=True,
28
+ stream=True
29
+ )
30
 
31
+ generated_text = ""
32
+ for new_text in res:
33
+ generated_text += new_text
34
+ print(new_text, flush=True, end='')
35
 
 
36
 
37
+ #import google.generativeai as genai
38
+ #import os
39
 
40
+ #genai.configure(api_key=os.environ["AIzaSyB5WiEJf_yLMD1dMQf305EAbaPTzF_QD-I"])
41
 
42
+ #model = genai.GenerativeModel('gemini-1.5-flash')
43
+ #response = model.generate_content(
44
+ # text_input="the color of the car is ?",
45
+ # image_input="car.jpg"
46
+ #)
47
+ #print(response)