Lahiru Menikdiwela
commited on
Commit
·
c6d2349
1
Parent(s):
76446bf
fix input format issue
Browse files- preprocess.py +2 -0
preprocess.py
CHANGED
@@ -21,6 +21,8 @@ def get_document_splits_from_text(text:str) -> Document:
|
|
21 |
|
22 |
|
23 |
def prepare_for_summarize(text:str,tokenizer):
|
|
|
|
|
24 |
no_input_tokens = get_input_token_count(text,tokenizer)
|
25 |
if no_input_tokens<12000:
|
26 |
text_to_summarize = text
|
|
|
21 |
|
22 |
|
23 |
def prepare_for_summarize(text:str,tokenizer):
|
24 |
+
#!!!!!!!!!!!!Only for small inputs
|
25 |
+
return text
|
26 |
no_input_tokens = get_input_token_count(text,tokenizer)
|
27 |
if no_input_tokens<12000:
|
28 |
text_to_summarize = text
|