Update README.md
Browse files
README.md
CHANGED
@@ -25,47 +25,59 @@ This model was trained using [the heron library](https://github.com/turingmotors
|
|
25 |
Follow [the installation guide](https://github.com/turingmotors/heron/tree/dev-0.0.1#1-clone-this-repository).
|
26 |
|
27 |
```python
|
28 |
-
import requests
|
29 |
-
from PIL import Image
|
30 |
-
|
31 |
import torch
|
32 |
-
from
|
33 |
-
from
|
34 |
|
35 |
-
device_id =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
model = GitLlamaForCausalLM.from_pretrained('turing-motors/heron-chat-git-ja-stablelm-base-7b-v0')
|
39 |
model.eval()
|
40 |
-
model.to(
|
41 |
|
42 |
# prepare a processor
|
43 |
-
processor =
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# prepare inputs
|
46 |
url = "https://www.barnorama.com/wp-content/uploads/2016/12/03-Confusing-Pictures.jpg"
|
47 |
image = Image.open(requests.get(url, stream=True).raw)
|
48 |
|
49 |
-
text = f"##
|
50 |
|
51 |
# do preprocessing
|
52 |
inputs = processor(
|
53 |
-
text,
|
54 |
-
image,
|
55 |
return_tensors="pt",
|
56 |
truncation=True,
|
57 |
)
|
58 |
-
|
|
|
|
|
59 |
|
60 |
# set eos token
|
61 |
eos_token_id_list = [
|
62 |
processor.tokenizer.pad_token_id,
|
63 |
processor.tokenizer.eos_token_id,
|
|
|
64 |
]
|
65 |
|
66 |
# do inference
|
67 |
with torch.no_grad():
|
68 |
-
out = model.generate(**inputs, max_length=256, do_sample=False, temperature=0., eos_token_id=eos_token_id_list)
|
69 |
|
70 |
# print result
|
71 |
print(processor.tokenizer.batch_decode(out))
|
|
|
25 |
Follow [the installation guide](https://github.com/turingmotors/heron/tree/dev-0.0.1#1-clone-this-repository).
|
26 |
|
27 |
```python
|
|
|
|
|
|
|
28 |
import torch
|
29 |
+
from heron.models.video_blip import VideoBlipForConditionalGeneration, VideoBlipProcessor
|
30 |
+
from transformers import LlamaTokenizer
|
31 |
|
32 |
+
device_id = 1
|
33 |
+
device = f"cuda:{device_id}"
|
34 |
+
|
35 |
+
max_length = 512
|
36 |
+
MODEL_NAME = "turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0"
|
37 |
+
|
38 |
+
model = VideoBlipForConditionalGeneration.from_pretrained(
|
39 |
+
MODEL_NAME, torch_dtype=torch.float16, ignore_mismatched_sizes=True
|
40 |
+
)
|
41 |
|
42 |
+
model = model.half()
|
|
|
43 |
model.eval()
|
44 |
+
model.to(device)
|
45 |
|
46 |
# prepare a processor
|
47 |
+
processor = VideoBlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
48 |
+
tokenizer = LlamaTokenizer.from_pretrained("novelai/nerdstash-tokenizer-v1", additional_special_tokens=['▁▁'])
|
49 |
+
processor.tokenizer = tokenizer
|
50 |
+
|
51 |
+
import requests
|
52 |
+
from PIL import Image
|
53 |
|
54 |
# prepare inputs
|
55 |
url = "https://www.barnorama.com/wp-content/uploads/2016/12/03-Confusing-Pictures.jpg"
|
56 |
image = Image.open(requests.get(url, stream=True).raw)
|
57 |
|
58 |
+
text = f"##human: この画像の面白い点は何ですか?\n##human: "
|
59 |
|
60 |
# do preprocessing
|
61 |
inputs = processor(
|
62 |
+
text=text,
|
63 |
+
images=image,
|
64 |
return_tensors="pt",
|
65 |
truncation=True,
|
66 |
)
|
67 |
+
|
68 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
69 |
+
inputs["pixel_values"] = inputs["pixel_values"].to(device, torch.float16)
|
70 |
|
71 |
# set eos token
|
72 |
eos_token_id_list = [
|
73 |
processor.tokenizer.pad_token_id,
|
74 |
processor.tokenizer.eos_token_id,
|
75 |
+
int(tokenizer.convert_tokens_to_ids("##"))
|
76 |
]
|
77 |
|
78 |
# do inference
|
79 |
with torch.no_grad():
|
80 |
+
out = model.generate(**inputs, max_length=256, do_sample=False, temperature=0., eos_token_id=eos_token_id_list, no_repeat_ngram_size=2)
|
81 |
|
82 |
# print result
|
83 |
print(processor.tokenizer.batch_decode(out))
|