Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from transformers import pipeline | |
import base64 | |
def upload_and_extract_text(): | |
uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"]) | |
if uploaded_file is not None: | |
ocr_pipeline = pipeline("text2text-generation", model="google/t5-v1_1-large") | |
extracted_text = ocr_pipeline(uploaded_file.read(), max_length=1024, do_sample=False)[0]["generated_text"] | |
return extracted_text | |
def text_to_dataframe(text): | |
lines = text.split("\n") | |
data = [] | |
for line in lines: | |
data.append([line]) | |
df = pd.DataFrame(data, columns=["Text"]) | |
return df | |
def main(): | |
st.title("PDF to Spreadsheet Converter") | |
st.write("Upload a PDF file to extract text and save it as a spreadsheet.") | |
extracted_text = upload_and_extract_text() | |
if extracted_text is not None: | |
st.write("### Extracted Text") | |
st.write(extracted_text) | |
df = text_to_dataframe(extracted_text) | |
st.write("### Spreadsheet Preview") | |
st.write(df) | |
csv = df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() | |
href = f'<a href="data:file/csv;base64,{b64}" download="extracted_text.csv">Download Extracted Text as CSV</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() | |