Spaces:
Sleeping
Sleeping
File size: 3,194 Bytes
f1691d8 026783f 0dd7ae7 162dd8b f1691d8 162dd8b f1691d8 0dd7ae7 f1691d8 8639815 f6a07f3 f1691d8 f6a07f3 f1691d8 162dd8b 026783f 162dd8b 026783f f1691d8 b9c9dac a6ce7fc f1691d8 a6ce7fc f1691d8 b9c9dac f1691d8 162dd8b 8639815 162dd8b b9c9dac 162dd8b f1691d8 162dd8b f1691d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import PyPDF2
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.documents import Document
from pathlib import Path
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
max_new_tokens=4096,
temperature=0.5,
do_sample=False,
task="summary_text"
)
llm_engine_hf = ChatHuggingFace(llm=llm)
def read_pdf(file_path):
pdf_reader = PyPDF2.PdfReader(file_path)
text = ""
for page in range(len(pdf_reader.pages)):
text += pdf_reader.pages[page].extract_text()
return text
def summarize(file, n_words):
# Read the content of the uploaded file
file_path = file.name
if file_path.endswith('.pdf'):
file_content = read_pdf(file_path)
else:
with open(file_path, 'r', encoding='utf-8') as f:
file_content = f.read()
document = Document(file_content)
# Generate the summary
text = document.page_content
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=200)
chunks = text_splitter.create_documents([text])
n_words = n_words
template = '''
Your task is to summarize a long text into a detailed summary.
Start with an introductory sentence and then summarize the main points in a logical order, ensuring the summary is very detailed.
Here is the long text to summarize:
Text:
{TEXT}
'''
prompt = PromptTemplate(
template=template,
input_variables=['TEXT']
)
formatted_prompt = prompt.format(TEXT=text, N_WORDS=n_words)
output_summary = llm_engine_hf.invoke(formatted_prompt)
return output_summary.content
def download_summary(output_text):
if output_text:
file_path = Path('summary.txt')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(output_text)
return file_path
else:
return None
def create_download_file(summary_text):
file_path = download_summary(summary_text)
return str(file_path) if file_path else None
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Document Summarizer")
with gr.Row():
with gr.Column():
file = gr.File(label="Submit a file")
with gr.Column():
output_text = gr.Textbox(label="Summary", lines=20)
submit_button = gr.Button("Summarize")
submit_button.click(summarize, inputs=[file], outputs=output_text)
def generate_file():
summary_text = output_text
file_path = download_summary(summary_text)
return file_path
download_button = gr.Button("Download Summary")
download_button.click(
fn=create_download_file,
inputs=[output_text],
outputs=gr.File()
)
# Run the Gradio app
demo.launch(share=True) |