Spaces:
Runtime error
Runtime error
File size: 1,952 Bytes
6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 f4de9a0 6ee5519 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from pydantic import BaseModel
import spaces
device = 'cuda'
# Load your LLM model and tokenizer
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-4k-instruct",
device_map=device,
torch_dtype="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
# Pydantic class for output validation
class VideoAnalysis(BaseModel):
indoor: int
hands_free: int
screen_interaction: int
standing: int
@spaces.GPU(duration=100)
def process_description(description):
# Construct a prompt for your LLM based on the video description
prompt = f"""
You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False:
Video Description: {description}
Questions:
- Is the scene indoors?
- Are the subject's hands free?
- Is there screen interaction by the subject?
- Is the subject standing?
Provide your answers in JSON format like this:
{{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}}
"""
generation_args = {
"max_new_tokens": 100, # Adjust as needed
"return_full_text": False,
"temperature": 0.0,
"do_sample": False,
}
output = pipe(prompt, **generation_args)
json_text = output[0]['generated_text']
try:
# Attempt to parse and validate the JSON response
analysis_result = VideoAnalysis.model_validate_json(json_text)
return analysis_result.model_dump_json() # Return as valid JSON
except Exception as e:
print(f"Error processing LLM output: {e}")
return {"error": "Could not process the video description."} |