Spaces:

tohoku-nlp
/

Sketch2Diagram

Running on L4

App Files Files Community

DaddyDaniel commited on Apr 22

Commit

979c542

1 Parent(s): 30f49a1

Add args to model

Browse files

User can select args for inference.
Inference logic moved to qwen2_inference.py

Files changed (3) hide show

main.py +0 -7
qwen2_inference.py +58 -0
sketch2diagram.py +16 -12

main.py CHANGED Viewed

@@ -1,11 +1,4 @@
 import streamlit as st
-from transformers import pipeline
-@st.cache_resource
-def get_model():
-    # Load the model here
-    model = pipeline("image-to-text", model="itsumi-st/imgtikz_qwen2vl")
-    return model
 st.logo("NLP_Group_logo.svg", size="large")
 main_page = st.Page("main_page.py", title="Main Page", icon="🏠")

 import streamlit as st
 st.logo("NLP_Group_logo.svg", size="large")
 main_page = st.Page("main_page.py", title="Main Page", icon="🏠")

qwen2_inference.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import torch
+from PIL import Image
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+# Inference steps taken from https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct
+@st.cache_resource
+def get_model(model_path):
+    try:
+        with st.spinner(f"Loading model {model_path}"):
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            # Load the model here
+            model_import = Qwen2VLForConditionalGeneration.from_pretrained(
+                model_path, torch_dtype="auto", device_map=device
+            )
+            processor_import = AutoProcessor.from_pretrained(model_path)
+            return model_import, processor_import
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return None, None
+def run_inference(input_file, model_path, args):
+    model, processor = get_model(model_path)
+    if model is None or processor is None:
+        return "Error loading model."
+    image = Image.open(input_file)
+    conversation = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image"},
+                {"type": "text", "text": "Please generate TikZ code to draw the diagram of the given image."}
+            ],
+        }
+    ]
+    text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
+    inputs = processor(image, text_prompt, return_tensors="pt").to("cuda")
+    output_ids = model.generate(**inputs,
+                                max_new_tokens=args.max_length,
+                                do_sample=True,
+                                top_p=args.top_p,
+                                top_k=args.top_k,
+                                num_return_sequences=1,
+                                temperature=args.temperature
+                            )
+    generated_ids = [
+        output_ids[len(input_ids):]
+        for input_ids, output_ids in zip(inputs.input_ids, output_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
+    )
+    return output_text

sketch2diagram.py CHANGED Viewed

@@ -1,12 +1,19 @@
 import streamlit as st
-from PIL import Image
-from main import get_model
 # Sidebar Setup
 st.sidebar.title("Model Configuration")
-inference_strat = st.sidebar.selectbox("Inference Strategy", ["Iterative", "Multi-candidate"],
-                                       help="Choose the inference strategy for the model. Iterative generates one candidate at a time until an output compiles, while Multi-candidate generates multiple candidates in parallel.")
 # Introduction Section
 st.title("Sketch2Diagram")
@@ -14,7 +21,7 @@ st.title("Sketch2Diagram")
 st.write("This is a runnable demo of ImgTikZ model introduced in the Sketch2Diagram paper.")
 st.write("Please refer to the [original paper](https://openreview.net/pdf?id=KvaDHPhhir) for more details.")
 st.write("The model is trained to convert sketches into TikZ code, which can be used to generate vectorized diagrams.")
-st.write(f"Inference Strategy: {inference_strat}")
 # User Input Section
 st.subheader("Upload your sketch")
@@ -35,12 +42,9 @@ if input_file is not None:
     st.image(input_file, caption="Uploaded Sketch")
     generate_command = st.button("Generate TikZ Code")
 if generate_command:
-    model = get_model()
-    image = Image.open(input_file)
     with st.spinner("Generating TikZ code..."):
-        output = model(image)
-    tikz_code = output[0]['generated_text']
-    st.subheader("Generated TikZ Code")
-    st.code(tikz_code, language='latex')

 import streamlit as st
+from qwen2_inference import run_inference
+args = {}
 # Sidebar Setup
 st.sidebar.title("Model Configuration")
+model_name = st.sidebar.selectbox("Model Name", ['Itsumi-st/Imgtikz_Qwen2vl', 'Qwen/Qwen2-VL-7B-Instruct'])
+args['inference_strat'] = st.sidebar.selectbox("Inference Strategy", ["Iterative", "Multi-candidate"],
+                                            help="Choose the inference strategy for the model. Iterative generates one candidate at a time until an output compiles, while Multi-candidate generates multiple candidates in parallel.")
+args['max_length'] = st.sidebar.slider("Max Length", 1, 5096, 2048, help="Maximum length of the generated output. The model will generate text up to this length.")
+args['seed'] = st.sidebar.number_input("Seed", min_value=0, value=42, step=1)
+args['top_p'] = st.sidebar.slider("Top P", 0.0, 1.0, 1.0, step=0.01, help="Top P sampling parameter. The model will sample from the top P percentage of the probability distribution.")
+args['temperature'] = st.sidebar.slider("Top P", 0.0, 1.0, 0.6, step=0.01, help="Temperature parameter for sampling. Higher values result in more random outputs.")
+args['top_k'] = st.sidebar.slider("Top K", 0, 100, 50, step=1, help="Top K sampling parameter. The model will sample from the top K tokens with the highest probabilities.")
 # Introduction Section
 st.title("Sketch2Diagram")
 st.write("This is a runnable demo of ImgTikZ model introduced in the Sketch2Diagram paper.")
 st.write("Please refer to the [original paper](https://openreview.net/pdf?id=KvaDHPhhir) for more details.")
 st.write("The model is trained to convert sketches into TikZ code, which can be used to generate vectorized diagrams.")
 # User Input Section
 st.subheader("Upload your sketch")
     st.image(input_file, caption="Uploaded Sketch")
     generate_command = st.button("Generate TikZ Code")
+# Run model inference
 if generate_command:
     with st.spinner("Generating TikZ code..."):
+        output = run_inference(input_file, model_name, args)
+        st.success("TikZ code generated successfully!")
+        st.code(output, language='latex')