Spaces:

ManishThota
/

GSoC-Super-Rapid-Annotator

Runtime error

ManishThota commited on Aug 21, 2024

Commit

5cbf359

verified ·

1 Parent(s): 962cccf

Update src/text_processor.py

Files changed (1) hide show

src/text_processor.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import json
@@ -22,22 +23,25 @@ generation_args = {
     "do_sample": True
 }
-# Load the model and pipeline once
 def load_model_pipeline(model_path: str):
-    model = AutoModelForCausalLM.from_pretrained(
-        model_path,
-        device_map=device,
-        torch_dtype="auto",
-        trust_remote_code=True,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    return pipeline("text-generation", model=model, tokenizer=tokenizer)
 pipe = load_model_pipeline(model_path)
 # Generate logic from LLM output
 @spaces.GPU(duration=50)
-def generate_logic(llm_output: str, pipeline) -> str:
     prompt = f"""
     Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
@@ -52,7 +56,7 @@ def generate_logic(llm_output: str, pipeline) -> str:
         {"role": "user", "content": prompt},
     ]
-    response = pipeline(messages, **generation_args)
     generated_text = response[0]['generated_text']
     # Extract JSON from the generated text
@@ -78,7 +82,7 @@ class VideoAnalysis(BaseModel):
         )
 # Main function to process LLM output
-def process_llm_output(description: str) -> Dict:
-    generated_logic = generate_logic(description, pipe)
     structured_output = VideoAnalysis.from_llm_output(generated_logic)
     return structured_output.dict()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import json
     "do_sample": True
 }
+# Load the model and pipeline once and keep it in memory
 def load_model_pipeline(model_path: str):
+    if not hasattr(load_model_pipeline, "pipe"):
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            device_map=device,
+            torch_dtype="auto",
+            trust_remote_code=True,
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_path)
+        load_model_pipeline.pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+    return load_model_pipeline.pipe
+# Initialize the pipeline and keep it in memory
 pipe = load_model_pipeline(model_path)
 # Generate logic from LLM output
 @spaces.GPU(duration=50)
+def generate_logic(llm_output: str) -> str:
     prompt = f"""
     Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
         {"role": "user", "content": prompt},
     ]
+    response = pipe(messages, **generation_args)
     generated_text = response[0]['generated_text']
     # Extract JSON from the generated text
         )
 # Main function to process LLM output
+def process_description(description: str) -> Dict:
+    generated_logic = generate_logic(description)
     structured_output = VideoAnalysis.from_llm_output(generated_logic)
     return structured_output.dict()