Spaces:
Runtime error
Runtime error
Commit
·
ef4c284
1
Parent(s):
e37a87f
Update app.py
Browse files
app.py
CHANGED
@@ -192,11 +192,11 @@ def predict(
|
|
192 |
def proc_submission(
|
193 |
input_text: str,
|
194 |
model_name: str,
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
num_beams: int = 3,
|
201 |
max_input_length: int = 8182,
|
202 |
):
|
@@ -503,8 +503,6 @@ if __name__ == "__main__":
|
|
503 |
gr.Markdown("## Load Inputs & Select Parameters")
|
504 |
gr.Markdown(
|
505 |
"""Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
|
506 |
-
|
507 |
-
# See [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for details.
|
508 |
"""
|
509 |
)
|
510 |
with gr.Row(variant="compact"):
|
@@ -559,7 +557,7 @@ if __name__ == "__main__":
|
|
559 |
# gr.Markdown(
|
560 |
# "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
|
561 |
# )
|
562 |
-
output_text = gr.HTML("<p><em>
|
563 |
with gr.Column():
|
564 |
gr.Markdown("### Results & Scores")
|
565 |
with gr.Row():
|
@@ -587,81 +585,81 @@ if __name__ == "__main__":
|
|
587 |
label="Summary",
|
588 |
value="<center><i>Summary will appear here!</i></center>",
|
589 |
)
|
590 |
-
with gr.Column():
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
|
615 |
gr.Markdown("---")
|
616 |
-
with gr.Column():
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
with gr.Column():
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
|
666 |
# load_examples_button.click(
|
667 |
# fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
|
|
|
192 |
def proc_submission(
|
193 |
input_text: str,
|
194 |
model_name: str,
|
195 |
+
predrop_stopwords: bool = False,
|
196 |
+
repetition_penalty: float = 0.5,
|
197 |
+
no_repeat_ngram_size: int = 3,
|
198 |
+
length_penalty: float = 1.5,
|
199 |
+
token_batch_length: int = 1530,
|
200 |
num_beams: int = 3,
|
201 |
max_input_length: int = 8182,
|
202 |
):
|
|
|
503 |
gr.Markdown("## Load Inputs & Select Parameters")
|
504 |
gr.Markdown(
|
505 |
"""Enter/paste text below, or upload a file. Pick a model & adjust params (_optional_), and press **Summarize!**
|
|
|
|
|
506 |
"""
|
507 |
)
|
508 |
with gr.Row(variant="compact"):
|
|
|
557 |
# gr.Markdown(
|
558 |
# "_Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios._"
|
559 |
# )
|
560 |
+
output_text = gr.HTML("<p><em>Summarization should take ~1-2 minutes for most settings, but may extend up to 5-10 minutes in some scenarios.</em></p>")
|
561 |
with gr.Column():
|
562 |
gr.Markdown("### Results & Scores")
|
563 |
with gr.Row():
|
|
|
585 |
label="Summary",
|
586 |
value="<center><i>Summary will appear here!</i></center>",
|
587 |
)
|
588 |
+
# with gr.Column():
|
589 |
+
# gr.Markdown("### **Aggregate Summary Batches**")
|
590 |
+
# gr.Markdown(
|
591 |
+
# "_Note: this is an experimental feature. Feedback welcome in the [discussions](https://hf.co/spaces/pszemraj/document-summarization/discussions)!_"
|
592 |
+
# )
|
593 |
+
# with gr.Row():
|
594 |
+
# aggregate_button = gr.Button(
|
595 |
+
# "Aggregate!",
|
596 |
+
# variant="primary",
|
597 |
+
# )
|
598 |
+
# gr.Markdown(
|
599 |
+
# f"""Aggregate the above batches into a cohesive summary.
|
600 |
+
# - A secondary instruct-tuned LM consolidates info
|
601 |
+
# - Current model: [{AGGREGATE_MODEL}](https://hf.co/{AGGREGATE_MODEL})
|
602 |
+
# """
|
603 |
+
# )
|
604 |
+
# with gr.Column(variant="panel"):
|
605 |
+
# aggregated_summary = gr.HTML(
|
606 |
+
# label="Aggregate Summary",
|
607 |
+
# value="<center><i>Aggregate summary will appear here!</i></center>",
|
608 |
+
# )
|
609 |
+
# gr.Markdown(
|
610 |
+
# "\n\n_Aggregate summary is also appended to the bottom of the `.txt` file._"
|
611 |
+
# )
|
612 |
|
613 |
gr.Markdown("---")
|
614 |
+
# with gr.Column():
|
615 |
+
# gr.Markdown("### Advanced Settings")
|
616 |
+
# gr.Markdown(
|
617 |
+
# "Refer to [the guide doc](https://gist.github.com/pszemraj/722a7ba443aa3a671b02d87038375519) for what these are, and how they impact _quality_ and _speed_."
|
618 |
+
# )
|
619 |
+
# with gr.Row(variant="compact"):
|
620 |
+
# length_penalty = gr.Slider(
|
621 |
+
# minimum=0.3,
|
622 |
+
# maximum=1.1,
|
623 |
+
# label="length penalty",
|
624 |
+
# value=0.7,
|
625 |
+
# step=0.05,
|
626 |
+
# )
|
627 |
+
# token_batch_length = gr.Radio(
|
628 |
+
# choices=TOKEN_BATCH_OPTIONS,
|
629 |
+
# label="token batch length",
|
630 |
+
# # select median option
|
631 |
+
# value=TOKEN_BATCH_OPTIONS[len(TOKEN_BATCH_OPTIONS) // 2],
|
632 |
+
# )
|
633 |
+
|
634 |
+
# with gr.Row(variant="compact"):
|
635 |
+
# repetition_penalty = gr.Slider(
|
636 |
+
# minimum=1.0,
|
637 |
+
# maximum=5.0,
|
638 |
+
# label="repetition penalty",
|
639 |
+
# value=1.5,
|
640 |
+
# step=0.1,
|
641 |
+
# )
|
642 |
+
# no_repeat_ngram_size = gr.Radio(
|
643 |
+
# choices=[2, 3, 4, 5],
|
644 |
+
# label="no repeat ngram size",
|
645 |
+
# value=3,
|
646 |
+
# )
|
647 |
+
# predrop_stopwords = gr.Checkbox(
|
648 |
+
# label="Drop Stopwords (Pre-Truncation)",
|
649 |
+
# value=False,
|
650 |
+
# )
|
651 |
+
# with gr.Column():
|
652 |
+
# gr.Markdown("## About")
|
653 |
+
# gr.Markdown(
|
654 |
+
# "- Models are fine-tuned on the [🅱️ookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that generalizes well and is useful for summarizing text in academic and everyday use."
|
655 |
+
# )
|
656 |
+
# gr.Markdown(
|
657 |
+
# "- _Update April 2023:_ Additional models fine-tuned on the [PLOS](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-plos-norm) and [ELIFE](https://hf.co/datasets/pszemraj/scientific_lay_summarisation-elife-norm) subsets of the [scientific lay summaries](https://arxiv.org/abs/2210.09932) dataset are available (see dropdown at the top)."
|
658 |
+
# )
|
659 |
+
# gr.Markdown(
|
660 |
+
# "Adjust the max input words & max PDF pages for OCR by duplicating this space and [setting the environment variables](https://hf.co/docs/hub/spaces-overview#managing-secrets) `APP_MAX_WORDS` and `APP_OCR_MAX_PAGES` to the desired integer values."
|
661 |
+
# )
|
662 |
+
# gr.Markdown("---")
|
663 |
|
664 |
# load_examples_button.click(
|
665 |
# fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
|