Tulika2000 commited on
Commit
71b876d
·
verified ·
1 Parent(s): 3e07c38

Delete summarization.py

Browse files
Files changed (1) hide show
  1. summarization.py +0 -75
summarization.py DELETED
@@ -1,75 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Summarization.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1XblbxoRxB4XOHixjGij789FPD9KjKdhi
8
- """
9
-
10
- !pip install groq PyPDF2 gradio
11
-
12
- !pip install -U langchain langchain-community langchain-groq
13
-
14
- import os
15
- import PyPDF2
16
- import gradio as gr
17
- from langchain_groq.chat_models import ChatGroq
18
-
19
- # Set Groq API key securely
20
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # Fetch from environment variables
21
-
22
- # Ensure API key is available
23
- if not GROQ_API_KEY:
24
- raise ValueError("GROQ_API_KEY is not set. Add it in Hugging Face Secrets.")
25
-
26
- # Initialize LLM (Mistral-8x7B)
27
- llm = ChatGroq(model_name="mixtral-8x7b-32768")
28
-
29
- def extract_text_from_pdf(pdf_file):
30
- """Extract text from a PDF file."""
31
- text = ""
32
- reader = PyPDF2.PdfReader(pdf_file)
33
- for page in reader.pages:
34
- page_text = page.extract_text()
35
- if page_text:
36
- text += page_text + "\n"
37
- return text
38
-
39
- def summarize_text(text):
40
- """Summarize the text"""
41
- prompt = f"Summarize the following document:\n\n{text[:10000]}" # Limit input size
42
- response = llm.predict(prompt)
43
- return response
44
-
45
- def process_pdf(file):
46
- """Extract text and summarize PDF using Mistral-8x7B."""
47
- if file is None:
48
- return "No file uploaded."
49
-
50
- # Read file bytes and process it using PyPDF2
51
- pdf_reader = PyPDF2.PdfReader(file)
52
- text = ""
53
- for page in pdf_reader.pages:
54
- page_text = page.extract_text()
55
- if page_text:
56
- text += page_text + "\n"
57
-
58
- # Limit text size for API efficiency
59
- text = text[:10000] if len(text) > 10000 else text
60
-
61
- # Summarize
62
- summary = summarize_text(text)
63
- return summary
64
-
65
- # Create Gradio Interface
66
- interface = gr.Interface(
67
- fn=process_pdf,
68
- inputs=gr.File(label="Upload a PDF"),
69
- outputs="text",
70
- title="📄 PDF Summarizer",
71
- description="Upload a PDF file and get a summary"
72
- )
73
-
74
- # Run the app
75
- interface.launch()