tanahhh commited on
Commit
8babac7
·
1 Parent(s): 86b3344

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -15
README.md CHANGED
@@ -25,47 +25,59 @@ This model was trained using [the heron library](https://github.com/turingmotors
25
  Follow [the installation guide](https://github.com/turingmotors/heron/tree/dev-0.0.1#1-clone-this-repository).
26
 
27
  ```python
28
- import requests
29
- from PIL import Image
30
-
31
  import torch
32
- from transformers import AutoProcessor
33
- from heron.models.git_llm.git_llama import GitLlamaForCausalLM
34
 
35
- device_id = 0
 
 
 
 
 
 
 
 
36
 
37
- # prepare a pretrained model
38
- model = GitLlamaForCausalLM.from_pretrained('turing-motors/heron-chat-git-ja-stablelm-base-7b-v0')
39
  model.eval()
40
- model.to(f"cuda:{device_id}")
41
 
42
  # prepare a processor
43
- processor = AutoProcessor.from_pretrained('turing-motors/heron-chat-git-ja-stablelm-base-7b-v0', additional_special_tokens=["▁▁"])
 
 
 
 
 
44
 
45
  # prepare inputs
46
  url = "https://www.barnorama.com/wp-content/uploads/2016/12/03-Confusing-Pictures.jpg"
47
  image = Image.open(requests.get(url, stream=True).raw)
48
 
49
- text = f"##Instruction: Please answer the following question concletely. ##Question: What is unusual about this image? Explain precisely and concletely what he is doing? ##Answer: "
50
 
51
  # do preprocessing
52
  inputs = processor(
53
- text,
54
- image,
55
  return_tensors="pt",
56
  truncation=True,
57
  )
58
- inputs = {k: v.to(f"cuda:{device_id}") for k, v in inputs.items()}
 
 
59
 
60
  # set eos token
61
  eos_token_id_list = [
62
  processor.tokenizer.pad_token_id,
63
  processor.tokenizer.eos_token_id,
 
64
  ]
65
 
66
  # do inference
67
  with torch.no_grad():
68
- out = model.generate(**inputs, max_length=256, do_sample=False, temperature=0., eos_token_id=eos_token_id_list)
69
 
70
  # print result
71
  print(processor.tokenizer.batch_decode(out))
 
25
  Follow [the installation guide](https://github.com/turingmotors/heron/tree/dev-0.0.1#1-clone-this-repository).
26
 
27
  ```python
 
 
 
28
  import torch
29
+ from heron.models.video_blip import VideoBlipForConditionalGeneration, VideoBlipProcessor
30
+ from transformers import LlamaTokenizer
31
 
32
+ device_id = 1
33
+ device = f"cuda:{device_id}"
34
+
35
+ max_length = 512
36
+ MODEL_NAME = "turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0"
37
+
38
+ model = VideoBlipForConditionalGeneration.from_pretrained(
39
+ MODEL_NAME, torch_dtype=torch.float16, ignore_mismatched_sizes=True
40
+ )
41
 
42
+ model = model.half()
 
43
  model.eval()
44
+ model.to(device)
45
 
46
  # prepare a processor
47
+ processor = VideoBlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
48
+ tokenizer = LlamaTokenizer.from_pretrained("novelai/nerdstash-tokenizer-v1", additional_special_tokens=['▁▁'])
49
+ processor.tokenizer = tokenizer
50
+
51
+ import requests
52
+ from PIL import Image
53
 
54
  # prepare inputs
55
  url = "https://www.barnorama.com/wp-content/uploads/2016/12/03-Confusing-Pictures.jpg"
56
  image = Image.open(requests.get(url, stream=True).raw)
57
 
58
+ text = f"##human: この画像の面白い点は何ですか?\n##human: "
59
 
60
  # do preprocessing
61
  inputs = processor(
62
+ text=text,
63
+ images=image,
64
  return_tensors="pt",
65
  truncation=True,
66
  )
67
+
68
+ inputs = {k: v.to(device) for k, v in inputs.items()}
69
+ inputs["pixel_values"] = inputs["pixel_values"].to(device, torch.float16)
70
 
71
  # set eos token
72
  eos_token_id_list = [
73
  processor.tokenizer.pad_token_id,
74
  processor.tokenizer.eos_token_id,
75
+ int(tokenizer.convert_tokens_to_ids("##"))
76
  ]
77
 
78
  # do inference
79
  with torch.no_grad():
80
+ out = model.generate(**inputs, max_length=256, do_sample=False, temperature=0., eos_token_id=eos_token_id_list, no_repeat_ngram_size=2)
81
 
82
  # print result
83
  print(processor.tokenizer.batch_decode(out))