Image-Text-to-Text
Transformers
ONNX
Safetensors
English
idefics3
conversational
asnassar commited on
Commit
c15d2cc
Β·
verified Β·
1 Parent(s): 63a8850

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +22 -88
README.md CHANGED
@@ -94,101 +94,35 @@ generated_texts = processor.batch_decode(
94
  print(generated_texts[0])
95
  ```
96
 
97
- #### Docling:
98
 
99
 
100
- ```python
101
- import json
102
- import time
103
- from pathlib import Path
104
-
105
- import yaml
106
-
107
- from docling.datamodel.base_models import InputFormat
108
- from docling.datamodel.pipeline_options import SmolDoclingOptions, VlmPipelineOptions
109
- from docling.document_converter import DocumentConverter, PdfFormatOption
110
- from docling.pipeline.vlm_pipeline import VlmPipeline
111
-
112
- sources = [
113
- # "https://arxiv.org/pdf/2408.09869",
114
- "tests/data/2305.03393v1-pg9-img.png",
115
- # "tests/data/2305.03393v1-pg9.pdf",
116
- ]
117
-
118
- pipeline_options = VlmPipelineOptions() # artifacts_path="~/local_model_artifacts/"
119
- pipeline_options.generate_page_images = True
120
- # If force_backend_text = True, text from backend will be used instead of generated text
121
- pipeline_options.force_backend_text = False
122
-
123
-
124
- vlm_options = SmolDoclingOptions(
125
- # question="Convert this page to docling.",
126
- # load_in_8bit=True,
127
- # llm_int8_threshold=6.0,
128
- # quantized=False,
129
- )
130
-
131
- pipeline_options.vlm_options = vlm_options
132
-
133
- from docling_core.types.doc import DocItemLabel, ImageRefMode
134
- from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
135
-
136
- converter = DocumentConverter(
137
- format_options={
138
- InputFormat.PDF: PdfFormatOption(
139
- pipeline_cls=VlmPipeline,
140
- pipeline_options=pipeline_options,
141
- ),
142
- InputFormat.IMAGE: PdfFormatOption(
143
- pipeline_cls=VlmPipeline,
144
- pipeline_options=pipeline_options,
145
- ),
146
- }
147
- )
148
-
149
- out_path = Path("scratch")
150
- out_path.mkdir(parents=True, exist_ok=True)
151
 
152
- for source in sources:
153
- start_time = time.time()
154
- print("================================================")
155
- print("Processing... {}".format(source))
156
- print("================================================")
157
- print("")
158
 
159
- res = converter.convert(source)
 
 
160
 
161
- print("------------------------------------------------")
162
- print("MD:")
163
- print("------------------------------------------------")
164
- print("")
165
- print(res.document.export_to_markdown())
166
 
167
- # with (out_path / f"{res.input.file.stem}.html").open("w") as fp:
168
- # fp.write(res.document.export_to_html())
 
169
 
170
- res.document.save_as_html(
171
- filename=Path("{}/{}.html".format(out_path, res.input.file.stem)),
172
- image_mode=ImageRefMode.REFERENCED,
173
- labels=[*DEFAULT_EXPORT_LABELS, DocItemLabel.FOOTNOTE],
174
- )
175
 
176
- with (out_path / f"{res.input.file.stem}.json").open("w") as fp:
177
- fp.write(json.dumps(res.document.export_to_dict()))
 
178
 
179
- with (out_path / f"{res.input.file.stem}.yaml").open("w") as fp:
180
- fp.write(yaml.safe_dump(res.document.export_to_dict()))
181
-
182
- pg_num = res.document.num_pages()
183
-
184
- print("")
185
- inference_time = time.time() - start_time
186
- print(
187
- f"Total document prediction time: {inference_time:.2f} seconds, pages: {pg_num}"
188
- )
189
-
190
- print("================================================")
191
- print("done!")
192
- print("================================================")
193
- ```
194
 
 
94
  print(generated_texts[0])
95
  ```
96
 
97
+ #### Using Docling Example [here]()
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ ## Supported Instructions
 
 
 
 
 
102
 
103
+ ### πŸ’» Code
104
+ - `Convert code to text`
105
+ - `<code>`
106
 
107
+ ### πŸ“Š Chart
108
+ - `Convert chart to table.`
109
+ - `<chart>`
 
 
110
 
111
+ ### πŸ”’ Formula
112
+ - `Convert formula to LaTeX.`
113
+ - `<formula>`
114
 
115
+ ### πŸ“„ Docling
116
+ - `Convert this page to Docling.`
 
 
 
117
 
118
+ ### πŸ“‘ Table
119
+ - `Convert table to OTSL.`
120
+ - `<otsl>`
121
 
122
+ ### πŸ› οΈ No-Code Actions/Pipelines
123
+ - `OCR the text in a specific location: <loc_155><loc_233><loc_206><loc_237>`
124
+ - `Identify element at: <loc_247><loc_482><loc_252><loc_486>`
125
+ - `Find all 'text' elements on the page.`, `Retrieve all section headers.`
126
+ - `Detect footer elements on the page.`
127
+ - More *Coming soon!* 🚧
 
 
 
 
 
 
 
 
 
128