NuExtract-1.5

Running on Zero

App Files Files Community

donbr commited on Mar 6

Commit

38f9446

1 Parent(s): 1b211a2

monkey patch

Browse files

Files changed (2) hide show

app.py +44 -24
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -7,13 +7,19 @@ from itertools import cycle
 import torch
 import gradio as gr
 from urllib.parse import unquote
 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
 from data import extract_leaves, split_document, handle_broken_output, clean_json_text, sync_empty_fields
 from examples import examples as input_examples
 from nuextract_logging import log_event
-import spaces
 MAX_INPUT_SIZE = 10_000
@@ -131,36 +137,50 @@ def sliding_window_prediction(template, text, model, tokenizer, window_size=4000
 ######
-# Load the model and tokenizer
 model_name = "numind/NuExtract-v1.5"
 auth_token = os.environ.get("HF_TOKEN") or False
-model = AutoModelForCausalLM.from_pretrained(model_name,
                                              trust_remote_code=True,
                                              torch_dtype=torch.bfloat16,
                                              device_map="auto", use_auth_token=auth_token)
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=auth_token)
-model.eval()
-@spaces.GPU
 def gradio_interface_function(template, text, is_example):
-    if len(tokenizer.tokenize(text)) > MAX_INPUT_SIZE:
-        yield "", "Input text too long for space. Download model to use unrestricted.", ""
-        return  # End the function since there was an error
-    # Initialize the sliding window prediction process
-    prediction_generator = sliding_window_prediction(template, text, model, tokenizer, window_size=MAX_WINDOW_SIZE)
-    # Iterate over the generator to return values at each step
-    for progress, full_pred, html_content in prediction_generator:
-        # yield gr.update(value=chunk_info), gr.update(value=progress), gr.update(value=full_pred), gr.update(value=html_content)
-        yield progress, full_pred, html_content
-    # Conditionally log event if not an example and logging is configured
-    if not is_example:
-        try:
-            log_event(text, template, full_pred)
-        except Exception as e:
-            print(f"Warning: Could not log event: {e}", file=sys.stderr)
 # Set up the Gradio interface

 import torch
 import gradio as gr
+import spaces
 from urllib.parse import unquote
 from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+from transformers.cache_utils import DynamicCache
+# Add get_max_length method to DynamicCache if it doesn't exist
+# This is needed for compatibility with Phi-3.5 models
+if not hasattr(DynamicCache, 'get_max_length'):
+    DynamicCache.get_max_length = lambda self: self.get_seq_length()
 from data import extract_leaves, split_document, handle_broken_output, clean_json_text, sync_empty_fields
 from examples import examples as input_examples
 from nuextract_logging import log_event
 MAX_INPUT_SIZE = 10_000
 ######
+# Model is loaded here but will be moved to CUDA only when needed with ZeroGPU
 model_name = "numind/NuExtract-v1.5"
 auth_token = os.environ.get("HF_TOKEN") or False
+# Load tokenizer in advance but not the model
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=auth_token)
+# We define a function to load the model when needed
+def load_model():
+    model = AutoModelForCausalLM.from_pretrained(model_name,
                                              trust_remote_code=True,
                                              torch_dtype=torch.bfloat16,
                                              device_map="auto", use_auth_token=auth_token)
+    model.eval()
+    return model
+@spaces.GPU(duration=300)
 def gradio_interface_function(template, text, is_example):
+    try:
+        if len(tokenizer.tokenize(text)) > MAX_INPUT_SIZE:
+            yield "", "Input text too long for space. Download model to use unrestricted.", ""
+            return  # End the function since there was an error
+        # Load the model when needed
+        model = load_model()
+        # Initialize the sliding window prediction process
+        prediction_generator = sliding_window_prediction(template, text, model, tokenizer, window_size=MAX_WINDOW_SIZE)
+        # Iterate over the generator to return values at each step
+        for progress, full_pred, html_content in prediction_generator:
+            # yield gr.update(value=chunk_info), gr.update(value=progress), gr.update(value=full_pred), gr.update(value=html_content)
+            yield progress, full_pred, html_content
+        # Conditionally log event if not an example and logging is configured
+        if not is_example:
+            try:
+                log_event(text, template, full_pred)
+            except Exception as e:
+                print(f"Warning: Could not log event: {e}", file=sys.stderr)
+    except Exception as e:
+        error_message = f"Error processing request: {str(e)}"
+        print(error_message, file=sys.stderr)
+        yield "", error_message, ""
 # Set up the Gradio interface

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 transformers
 torch
 accelerate
-spaces

 transformers
 torch
 accelerate
+spaces>=0.1.0