Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from PIL import Image | |
from transformers import AutoProcessor, AutoModelForCausalLM, AutoConfig | |
import json | |
import subprocess | |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True) | |
# Function to load the model and processor | |
def load_model_and_processor(): | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
config = AutoConfig.from_pretrained("microsoft/Florence-2-base-ft", trust_remote_code=True) | |
config.vision_config.model_type = "davit" | |
model = AutoModelForCausalLM.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True).to(device).eval() | |
processor = AutoProcessor.from_pretrained("sujet-ai/Lutece-Vision-Base", config=config, trust_remote_code=True) | |
return model, processor, device | |
# Function to generate answer | |
def generate_answer(model, processor, device, image, prompt): | |
task = "<FinanceQA>" | |
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device) | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_new_tokens=1024, | |
do_sample=False, | |
num_beams=3, | |
) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
parsed_answer = processor.post_process_generation(generated_text, task=task, image_size=(image.width, image.height)) | |
return parsed_answer[task] | |
# Function to display config without nested expanders | |
def display_config(config, depth=0): | |
for key, value in config.items(): | |
if isinstance(value, dict): | |
st.markdown(f"{' ' * depth}**{key}**:") | |
display_config(value, depth + 1) | |
else: | |
st.markdown(f"{' ' * depth}{key}: {value}") | |
# Streamlit app | |
def main(): | |
st.set_page_config(page_title="Lutece-Vision-Base Demo", page_icon="πΌ", layout="wide", initial_sidebar_state="expanded") | |
# Title and description | |
st.title("πΌ Lutece-Vision-Base Demo") | |
st.markdown("Upload a financial document and ask questions about it!") | |
# Sidebar with SujetAI watermark | |
st.sidebar.image("sujetAI.svg", use_column_width=True) | |
st.sidebar.markdown("---") | |
st.sidebar.markdown("Our website : [sujet.ai](https://sujet.ai)") | |
# Load model and processor | |
model, processor, device = load_model_and_processor() | |
# File uploader for document | |
uploaded_file = st.file_uploader("π Upload a financial document", type=["png", "jpg", "jpeg"]) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file).convert('RGB') | |
st.image(image, caption="Uploaded Document", use_column_width=True) | |
# Question input | |
question = st.text_input("β Ask a question about the document", "") | |
if st.button("π Generate Answer"): | |
with st.spinner("Generating answer..."): | |
answer = generate_answer(model, processor, device, image, question) | |
st.success(f"## π‘ {answer}") | |
# # Model configuration viewer | |
# with st.expander("π§ Model Configuration"): | |
# config_dict = model.config.to_dict() | |
# display_config(config_dict) | |
if __name__ == "__main__": | |
main() |