Flame-Code-VLM sundevil45 commited on
Commit
38f338d
·
verified ·
1 Parent(s): a74f72a

Update README.md (#2)

Browse files

- Update README.md (3b64b6c0d4b4d68042ad5eb9911c82a043033d22)


Co-authored-by: ys <[email protected]>

Files changed (1) hide show
  1. README.md +65 -66
README.md CHANGED
@@ -1,67 +1,66 @@
1
- ---
2
- license: apache-2.0
3
- ---
4
- We instruction-tuned the lmms-lab/llava-onevision-qwen2-7b-ov on the Flame-Code-VLM/Flame-Waterfall-React dataset.
5
- This model is released to showcase the value of the synthesized dataset. However, it is not intended for general-purpose tasks. Please use it with caution.
6
-
7
- ### Generation
8
-
9
- The following is the sample code for inference.
10
-
11
- ```python
12
-
13
- # pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
14
- # export PYTHONPATH="/your_path_to_LLaVA-NeXT_repo:$PYTHONPATH"
15
-
16
- from llava.model.builder import load_pretrained_model
17
- from llava.mm_utils import process_images, tokenizer_image_token
18
- from llava.constants import DEFAULT_IMAGE_TOKEN
19
-
20
- from PIL import Image
21
- import torch
22
- import warnings
23
-
24
- warnings.filterwarnings("ignore")
25
-
26
- pretrained = "Flame-Code-VLM/llava-qwen2-7b-ov-flamewaterfall"
27
-
28
- model_name = "llava_qwen"
29
- device = "cuda"
30
- device_map = "auto"
31
- llava_model_args = {
32
- "multimodal": True,
33
- "attn_implementation": None,
34
- }
35
- tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map,**llava_model_args)
36
- model.config.tokenizer_padding_side = 'left' # Use left padding for batch processing
37
- model.eval()
38
-
39
- url = "path_to_your_screenshot_image_file"
40
- image = Image.open(url)
41
- image_tensor = process_images([image], image_processor, model.config)
42
- image_tensor = [_image.to(dtype=torch.float16, device=device) for _image in image_tensor]
43
-
44
- prompt = "Below is an image of the page to create. Generate React code and styles to replicate the design, including layout, typography, and styling. Format your response as follows:'// CSS\n[CSS/SCSS code]\n\n// [React Implementation (JS/TS/JSX/TSX)]\n[Component code]'.\n\n ### Input Image:\n{image}\n\n### Response:\n"
45
-
46
- input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors='pt')
47
- input_ids = input_ids.unsqueeze(0)
48
- input_ids=input_ids.to(device)
49
- image_sizes = [image.size]
50
- modalities = ["image"]
51
-
52
- cont = model.generate(
53
- input_ids,
54
- images=image_tensor,
55
- image_sizes=image_sizes,
56
- modalities=modalities, # Added this line with the modalities
57
- do_sample=True,
58
- num_beams=5,
59
- temperature=0.1,
60
- max_new_tokens=4096,
61
- top_p=0.95,
62
- repetition_penalty=1.05
63
- )
64
-
65
- text_outputs = tokenizer.batch_decode(cont, skip_special_tokens=True)
66
-
67
  ```
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ We instruction-tuned the lmms-lab/llava-onevision-qwen2-7b-ov on the Flame-Code-VLM/Flame-Waterfall-React dataset.
5
+ This model is released to showcase the value of the synthesized dataset. However, it is not intended for general-purpose tasks. Please use it with caution.
6
+
7
+ ### Generation
8
+
9
+ The following is the sample code for inference.
10
+
11
+ ```python
12
+
13
+ # pip install git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
14
+ # export PYTHONPATH="/your_path_to_LLaVA-NeXT_repo:$PYTHONPATH"
15
+
16
+ from llava.model.builder import load_pretrained_model
17
+ from llava.mm_utils import process_images, tokenizer_image_token
18
+ from llava.constants import DEFAULT_IMAGE_TOKEN
19
+
20
+ from PIL import Image
21
+ import torch
22
+ import warnings
23
+
24
+ warnings.filterwarnings("ignore")
25
+
26
+ pretrained = "Flame-Code-VLM/llava-qwen2-7b-ov-flamewaterfall"
27
+
28
+ model_name = "llava_qwen"
29
+ device = "cuda"
30
+ device_map = "auto"
31
+ llava_model_args = {
32
+ "multimodal": True,
33
+ "attn_implementation": None,
34
+ }
35
+ tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map,**llava_model_args)
36
+ model.config.tokenizer_padding_side = 'left' # Use left padding for batch processing
37
+ model.eval()
38
+
39
+ url = "path_to_your_screenshot_image_file"
40
+ image = Image.open(url)
41
+ image_tensor = process_images([image], image_processor, model.config)
42
+ image_tensor = [_image.to(dtype=torch.float16, device=device) for _image in image_tensor]
43
+
44
+ prompt = "Below is an image of the page to create. Generate React code and styles to replicate the design, including layout, typography, and styling. Format your response as follows:'// CSS\n[CSS/SCSS code]\n\n// [React Implementation (JS/TS/JSX/TSX)]\n[Component code]'.\n\n ### Input Image:\n{image}\n\n### Response:\n"
45
+
46
+ input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors='pt')
47
+ input_ids = input_ids.unsqueeze(0)
48
+ input_ids=input_ids.to(device)
49
+ image_sizes = [image.size]
50
+ modalities = ["image"]
51
+
52
+ cont = model.generate(
53
+ input_ids,
54
+ images=image_tensor,
55
+ image_sizes=image_sizes,
56
+ modalities=modalities, # Added this line with the modalities
57
+ do_sample=True,
58
+ temperature=0.1,
59
+ max_new_tokens=4096,
60
+ top_p=0.95,
61
+ repetition_penalty=1.05
62
+ )
63
+
64
+ text_outputs = tokenizer.batch_decode(cont, skip_special_tokens=True)
65
+
 
66
  ```