Spaces:

mrsk1883
/

testingspace

Sleeping

App Files Files Community

mrsk1883 commited on Dec 8, 2023

Commit

e9382d2

1 Parent(s): 8e5bd23

Create app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from PyPDF2 import PdfReader
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from gtts import gTTS
+from IPython.display import Audio
+# Download the model and tokenizer
+model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def summarize_pdf_abstract(pdf_path):
+    """
+    Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
+    Args:
+      pdf_path: Path to the PDF file.
+    Returns:
+      A string containing the one-sentence summary of the abstract.
+    """
+    # Read the PDF file
+    reader = PdfReader(open(pdf_path, 'rb'))
+    # Extract the abstract
+    abstract_text = ""
+    for page in reader.pages:
+        # Search for keywords like "Abstract" or "Introduction"
+        if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
+            # Extract the text following the keyword
+            abstract_text = page.extract_text()
+            break
+    # Encode the abstract text
+    inputs = tokenizer(abstract_text, return_tensors="pt")
+    # Generate the summary
+    outputs = model.generate(**inputs)
+    # Decode the summary
+    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return summary
+# Define the file path
+pdf_path = "/content/Article 11 Hidden Technical Debt in Machine Learning Systems.pdf"
+# Summarize the abstract
+summary = summarize_pdf_abstract(pdf_path)
+# Print the summary
+print("One-sentence summary of the abstract:")
+print(summary)
+# Choose your preferred language for the audio
+language = "en"
+# Generate audio file
+speech = gTTS(summary, lang=language)
+speech.save("summary.mp3")
+# Display the audio file
+Audio("summary.mp3")