Spaces:

bigodel
/

hydra-classifier

Sleeping

App Files Files Community

João Pedro commited on Jan 27

Commit

6c6f2d5

1 Parent(s): b90d0b6

try to avoid streamlit re-running everything on ui changes

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -32,8 +32,13 @@ labels = [
 id2label = {i: label for i, label in enumerate(labels)}
 label2id = {v: k for k, v in id2label.items()}
-processor = LayoutLMv3Processor.from_pretrained("model/layoutlmv3/")
-model = LayoutLMv3ForSequenceClassification.from_pretrained("model/layoutlmv3/")
 st.title("Document Classification with LayoutLMv3")
@@ -41,15 +46,32 @@ uploaded_file = st.file_uploader(
     "Upload Document", type=["pdf", "jpg", "png"], accept_multiple_files=False
 )
 feedback_table = wandb.Table(columns=[
     'image', 'filetype', 'predicted_label', 'predicted_label_id',
     'correct_label', 'correct_label_id'
 ])
-if uploaded_file:
-    run = wandb.init(project='hydra-classifier', name='feedback-loop')
     if uploaded_file.type == "application/pdf":
         images = convert_from_bytes(uploaded_file.getvalue())
     else:
@@ -58,16 +80,7 @@ if uploaded_file:
     for i, image in enumerate(images):
         st.image(image, caption=f'Uploaded Image {i}', use_container_width=True)
-        print(f'Encoding image with index {i}')
-        encoding = processor(
-            image,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512,
-        )
-        print(f'Predicting image with index {i}')
-        outputs = model(**encoding)
-        prediction = outputs.logits.argmax(-1)[0].item()
         st.write(f"Prediction: {id2label[prediction]}")
@@ -96,5 +109,7 @@ if uploaded_file:
                 st.success(f"Feedback for Image {i} submitted!")
     print(feedback_table)
     run.log({'feedback_table': feedback_table})
     run.finish()

 id2label = {i: label for i, label in enumerate(labels)}
 label2id = {v: k for k, v in id2label.items()}
+if 'model' not in st.session_state:
+    st.session_state.model = LayoutLMv3ForSequenceClassification.from_pretrained("model/layoutlmv3/")
+if 'processor' not in st.session_state:
+    st.session_state.processor = LayoutLMv3Processor.from_pretrained("model/layoutlmv3/")
+model = st.session_state.model
+processor = st.session_state.processor
 st.title("Document Classification with LayoutLMv3")
     "Upload Document", type=["pdf", "jpg", "png"], accept_multiple_files=False
 )
 feedback_table = wandb.Table(columns=[
     'image', 'filetype', 'predicted_label', 'predicted_label_id',
     'correct_label', 'correct_label_id'
 ])
+if 'wandb_run' not in st.session_data:
+    st.session_data.wandb_run = wandb.init(project='hydra-classifier', name='feedback-loop')
+@st.cache_data
+def classify_image(image):
+    print(f'Encoding image with index {i}')
+    encoding = processor(
+        image,
+        return_tensors="pt",
+        truncation=True,
+        max_length=512,
+    )
+    print(f'Predicting image with index {i}')
+    outputs = model(**encoding)
+    prediction = outputs.logits.argmax(-1)[0].item()
+    return prediction
+if uploaded_file:
     if uploaded_file.type == "application/pdf":
         images = convert_from_bytes(uploaded_file.getvalue())
     else:
     for i, image in enumerate(images):
         st.image(image, caption=f'Uploaded Image {i}', use_container_width=True)
+        prediction = classify_image(image)
         st.write(f"Prediction: {id2label[prediction]}")
                 st.success(f"Feedback for Image {i} submitted!")
     print(feedback_table)
+    run = st.session_data.wandb_run
     run.log({'feedback_table': feedback_table})
     run.finish()
+    st.session_data.wandb_run = None