File size: 3,387 Bytes
d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
dd4319f
d5b5b3a
dd4319f
d5b5b3a
 
 
 
 
 
 
 
 
 
 
 
 
fffc505
d5b5b3a
 
 
041332b
d5b5b3a
041332b
d5b5b3a
 
041332b
 
d5b5b3a
 
 
dd4319f
d5b5b3a
 
 
 
 
 
fffc505
 
 
 
 
 
 
 
 
 
 
 
d5b5b3a
041332b
 
 
 
 
 
d5b5b3a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM, AutoConfig

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

# Function to load the model and processor
@st.cache_resource
def load_model_and_processor():
    config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True)
    config.vision_config.model_type = "davit"
    model = AutoModelForCausalLM.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True).eval()
    processor = AutoProcessor.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True)
    return model, processor

# Function to generate answer
def generate_answer(model, processor, image, prompt):
    task = "<FinanceQA>"
    inputs = processor(text=prompt, images=image, return_tensors="pt")
    generated_ids = model.generate(
        input_ids=inputs["input_ids"],
        pixel_values=inputs["pixel_values"],
        max_new_tokens=1024,
        do_sample=False,
        num_beams=3,
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
    parsed_answer = processor.post_process_generation(generated_text, task=task, image_size=(image.width, image.height))
    return parsed_answer[task]

# Streamlit app
def main():
    st.set_page_config(page_title="Lutece-Vision-Base Demo", page_icon="πŸ—Ό", layout="wide", initial_sidebar_state="expanded")

    # Title and description
    st.title("πŸ—Ό Lutece-Vision-Base Demo")
    st.markdown("Please keep in mind that inference might be slower since this Huggingface space is running on CPU only.")

        # Sidebar with SujetAI watermark
    st.sidebar.image("sujetAI.svg", use_column_width=True)
    st.sidebar.markdown("---")
    st.sidebar.markdown("Sujet AI, a Paris-based AI startup, is on a noble mission to democratize investment opportunities by leveraging built-in models and cutting-edge technologies. Committed to open-sourcing its technology, Sujet AI aims to contribute to the research and development communities, ultimately serving the greater good of humanity.")
    st.sidebar.markdown("---")
    st.sidebar.markdown("Our website : [sujet.ai](https://sujet.ai)")

    # Load model and processor
    model, processor = load_model_and_processor()

    # File uploader for document
    uploaded_file = st.file_uploader("πŸ“„ Upload a financial document", type=["png", "jpg", "jpeg"])

    if uploaded_file is not None:
        image = Image.open(uploaded_file).convert('RGB')
        
        # Two-column layout
        col1, col2 = st.columns(2)
        
        with col1:
            # Display image with controlled size
            st.image(image, caption="Uploaded Document", use_column_width=True)
        
        with col2:
            # Question input
            question = st.text_input("❓ Ask a question about the document", "")
            submit_button = st.button("πŸ” Generate Answer")

            # Answer section spanning both columns
            if submit_button and question:
                with st.spinner("Generating answer..."):
                    answer = generate_answer(model, processor, image, question)
                st.success(f"## πŸ’‘ {answer}")


if __name__ == "__main__":
    main()