File size: 769 Bytes
e7cb6de
db61c57
 
e7cb6de
db61c57
 
 
 
 
 
 
 
 
e8ee5f1
db61c57
 
 
 
 
 
 
 
e7cb6de
37c61d6
db61c57
37c61d6
 
db61c57
 
37c61d6
db61c57
37c61d6
e7cb6de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from haystack.nodes import PreProcessor
from haystack import Document

preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=True,
  	remove_substrings=None,
    split_by="word",
    split_length=200,
    split_respect_sentence_boundary=True,
    split_overlap=0,
  	max_chars_check=10_000
)

def chunk(text):
    splits = preprocessor.process(Document(text))

    return [
        (i%3, split.content) for i, split in enumerate(splits)
    ]

iface = gr.Interface(
    fn=chunk,
    inputs="text",
    outputs=gr.HighlightedText(
        label="Highlights",
        combine_adjacent=False,
        show_legend=True,
        color_map={"0": "red", "1": "green", "2": "yellow"}),
)
iface.launch()