Spaces:
Runtime error
Runtime error
File size: 3,958 Bytes
5cbf359 962cccf 6ee5519 9fd7f68 eec5a54 a5beb04 eec5a54 f4de9a0 eec5a54 9fd7f68 6ee5519 9fd7f68 6ee5519 9fd7f68 eec5a54 9fd7f68 5cbf359 eec5a54 5cbf359 eec5a54 5cbf359 eec5a54 5cbf359 eec5a54 9fd7f68 eec5a54 9fd7f68 5cbf359 eec5a54 9fd7f68 eec5a54 032d6c3 f4de9a0 eec5a54 f4de9a0 9fd7f68 f4de9a0 9fd7f68 f4de9a0 9fd7f68 eec5a54 032d6c3 9fd7f68 f4de9a0 eec5a54 5cbf359 eec5a54 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import json
import warnings
from pydantic import BaseModel
from typing import Dict
import spaces
device = "cuda"
# Ignore warnings
warnings.filterwarnings(action='ignore')
# Set random seed
torch.random.manual_seed(0)
# Define model path and generation arguments
model_path = "microsoft/Phi-3-mini-4k-instruct"
generation_args = {
"max_new_tokens": 50,
"return_full_text": False,
"temperature": 0.1,
"do_sample": True
}
# Load the model and pipeline once and keep it in memory
def load_model_pipeline(model_path: str):
if not hasattr(load_model_pipeline, "pipe"):
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map=device,
torch_dtype="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
load_model_pipeline.pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
return load_model_pipeline.pipe
# Initialize the pipeline and keep it in memory
pipe = load_model_pipeline(model_path)
# Generate logic from LLM output
@spaces.GPU(duration=50)
def generate_logic(llm_output: str) -> str:
prompt = f"""
Provide the response in json string for the below keys and context based on the description: '{llm_output}'.
Screen.interaction_yes: This field indicates whether there was an interaction of the person with a screen during the activity. A value of 1 means there was screen interaction (Yes), and a value of 0 means there was no screen interaction (No).
Hands.free: This field indicates whether the person's hands were free during the activity. A value of 1 means the person was not holding anything (Yes), indicating free hands. A value of 0 means the person was holding something (No), indicating the hands were not free.
Indoors: This field indicates whether the activity took place indoors. A value of 1 means the activity occurred inside a building or enclosed space (Yes), and a value of 0 means the activity took place outside (No).
Standing: This field indicates whether the person was standing during the activity. A value of 1 means the person was standing (Yes), and a value of 0 means the person was not standing (No).
"""
messages = [
{"role": "system", "content": "Please answer questions just based on this information: " + llm_output},
{"role": "user", "content": prompt},
]
response = pipe(messages, **generation_args)
generated_text = response[0]['generated_text']
# Extract JSON from the generated text
start_index = generated_text.find('{')
end_index = generated_text.rfind('}') + 1
json_str = generated_text[start_index:end_index]
# Log the generated JSON string for debugging
print(f"Generated JSON: {json_str}")
if not json_str.strip():
raise ValueError("Generated logic is empty or invalid JSON")
return json_str
# Pydantic model for structured output
class VideoAnalysis(BaseModel):
screen_interaction_yes: int
hands_free: int
indoors: int
standing: int
@classmethod
def from_llm_output(cls, generated_logic: str) -> 'VideoAnalysis':
try:
logic_dict = json.loads(generated_logic)
except json.JSONDecodeError as e:
raise ValueError(f"Error decoding JSON: {e}") from e
return cls(
screen_interaction_yes=logic_dict.get("Screen.interaction_yes", 0),
hands_free=logic_dict.get("Hands.free", 0),
indoors=logic_dict.get("Indoors", 0),
standing=logic_dict.get("Standing", 0)
)
# Main function to process LLM output
def process_description(description: str) -> Dict:
generated_logic = generate_logic(description)
structured_output = VideoAnalysis.from_llm_output(generated_logic)
return structured_output.dict()
|