Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
from transformers import AutoProcessor, AutoModelForCausalLM, AutoConfig | |
import subprocess | |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
# Function to load the model and processor | |
def load_model_and_processor(): | |
config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True) | |
config.vision_config.model_type = "davit" | |
model = AutoModelForCausalLM.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True).eval() | |
processor = AutoProcessor.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True) | |
return model, processor | |
# Function to generate answer | |
def generate_answer(model, processor, image, prompt): | |
task = "<FinanceQA>" | |
inputs = processor(text=prompt, images=image, return_tensors="pt") | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_new_tokens=1024, | |
do_sample=False, | |
num_beams=3, | |
) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
parsed_answer = processor.post_process_generation(generated_text, task=task, image_size=(image.width, image.height)) | |
return parsed_answer[task] | |
# Streamlit app | |
def main(): | |
st.set_page_config(page_title="Lutece-Vision-Base Demo", page_icon="πΌ", layout="wide", initial_sidebar_state="expanded") | |
# Title and description | |
st.title("πΌ Lutece-Vision-Base Demo") | |
st.markdown("Upload a financial document and ask questions about it!") | |
# Sidebar with SujetAI watermark | |
st.sidebar.image("sujetAI.svg", use_column_width=True) | |
st.sidebar.markdown("---") | |
st.sidebar.markdown("Our website : [sujet.ai](https://sujet.ai)") | |
# Load model and processor | |
model, processor = load_model_and_processor() | |
# File uploader for document | |
uploaded_file = st.file_uploader("π Upload a financial document", type=["png", "jpg", "jpeg"]) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file).convert('RGB') | |
# Two-column layout | |
col1, col2 = st.columns(2) | |
with col1: | |
# Display image with controlled size | |
st.image(image, caption="Uploaded Document", use_column_width=True) | |
with col2: | |
# Question input | |
question = st.text_input("β Ask a question about the document", "") | |
submit_button = st.button("π Generate Answer") | |
# Answer section spanning both columns | |
if submit_button and question: | |
with st.spinner("Generating answer..."): | |
answer = generate_answer(model, processor, image, question) | |
st.success("Answer generated!") | |
st.markdown(f"## π‘ Answer") | |
st.markdown(answer) | |
if __name__ == "__main__": | |
main() |