File size: 1,952 Bytes
6ee5519
 
f4de9a0
6ee5519
f4de9a0
6ee5519
 
 
 
 
f4de9a0
6ee5519
 
 
 
 
 
 
 
 
f4de9a0
 
 
 
 
 
 
 
 
6ee5519
f4de9a0
 
 
 
6ee5519
f4de9a0
 
 
 
 
 
 
6ee5519
f4de9a0
 
 
 
6ee5519
 
 
 
 
 
f4de9a0
6ee5519
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from pydantic import BaseModel
import spaces 

device = 'cuda'

# Load your LLM model and tokenizer 
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map=device,
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

# Pydantic class for output validation
class VideoAnalysis(BaseModel):
    indoor: int
    hands_free: int
    screen_interaction: int
    standing: int

@spaces.GPU(duration=100)
def process_description(description):
    # Construct a prompt for your LLM based on the video description
    prompt = f"""
    You are a helpful AI assistant. Analyze the following video description and answer the questions with 0 for True and 1 for False: 
    
    Video Description: {description}

    Questions:
    - Is the scene indoors? 
    - Are the subject's hands free?
    - Is there screen interaction by the subject?
    - Is the subject standing? 
    
    Provide your answers in JSON format like this: 
    {{"indoor": 0, "hands_free": 1, "screen_interaction": 0, "standing": 1}} 
    """

    generation_args = {
        "max_new_tokens": 100,  # Adjust as needed
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }

    output = pipe(prompt, **generation_args)
    json_text = output[0]['generated_text']
    
    try:
        # Attempt to parse and validate the JSON response
        analysis_result = VideoAnalysis.model_validate_json(json_text)
        return analysis_result.model_dump_json()  # Return as valid JSON
    except Exception as e:
        print(f"Error processing LLM output: {e}")
        return {"error": "Could not process the video description."}