Spaces:

sujet-ai
/

Lutece-Vision-Base-DEMO

Running

File size: 3,387 Bytes

d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
 
fffc505
d5b5b3a
 
 
041332b
d5b5b3a
041332b
d5b5b3a
 
041332b
 
d5b5b3a
 
 
dd4319f
d5b5b3a
 
 
 
 
 
fffc505
 
 
 
 
 
 
 
 
 
 
 
d5b5b3a
041332b
 
 
 
 
 
d5b5b3a

import streamlit as st
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM, AutoConfig

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

# Function to load the model and processor
@st.cache_resource
def load_model_and_processor():
    config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True)
    config.vision_config.model_type = "davit"
    model = AutoModelForCausalLM.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True).eval()
    processor = AutoProcessor.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True)
    return model, processor

# Function to generate answer
def generate_answer(model, processor, image, prompt):
    task = "<FinanceQA>"
    inputs = processor(text=prompt, images=image, return_tensors="pt")
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        do_sample=False,
        num_beams=3,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task, image_size=(image.width, image.height))
    return parsed_answer[task]

# Streamlit app
def main():
    st.set_page_config(page_title="Lutece-Vision-Base Demo", page_icon="🗼", layout="wide", initial_sidebar_state="expanded")

    # Title and description
    st.title("🗼 Lutece-Vision-Base Demo")
    st.markdown("Please keep in mind that inference might be slower since this Huggingface space is running on CPU only.")

        # Sidebar with SujetAI watermark
    st.sidebar.image("sujetAI.svg", use_column_width=True)
    st.sidebar.markdown("---")
    st.sidebar.markdown("Sujet AI, a Paris-based AI startup, is on a noble mission to democratize investment opportunities by leveraging built-in models and cutting-edge technologies. Committed to open-sourcing its technology, Sujet AI aims to contribute to the research and development communities, ultimately serving the greater good of humanity.")
    st.sidebar.markdown("---")
    st.sidebar.markdown("Our website : [sujet.ai](https://sujet.ai)")

    # Load model and processor
    model, processor = load_model_and_processor()

    # File uploader for document
    uploaded_file = st.file_uploader("📄 Upload a financial document", type=["png", "jpg", "jpeg"])

    if uploaded_file is not None:
        image = Image.open(uploaded_file).convert('RGB')
        
        # Two-column layout
        col1, col2 = st.columns(2)
        
        with col1:
            # Display image with controlled size
            st.image(image, caption="Uploaded Document", use_column_width=True)
        
        with col2:
            # Question input
            question = st.text_input("❓ Ask a question about the document", "")
            submit_button = st.button("🔍 Generate Answer")

            # Answer section spanning both columns
            if submit_button and question:
                with st.spinner("Generating answer..."):
                    answer = generate_answer(model, processor, image, question)
                st.success(f"## 💡 {answer}")


if __name__ == "__main__":
    main()