youssef commited on
Commit
bc47c2c
·
1 Parent(s): b75046f

feat: add better prompt

Browse files
Files changed (1) hide show
  1. src/video_processor/processor.py +24 -12
src/video_processor/processor.py CHANGED
@@ -34,14 +34,25 @@ class VideoAnalyzer:
34
  def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
35
  logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")
36
  try:
37
- # Create message for model
38
- messages = [{
39
- "role": "user",
40
- "content": [
41
- {"type": "video", "path": video_path},
42
- {"type": "text", "text": "Describe this video in detail - with all the timestamps and the actions happening in the video. I should be able to understand the video by reading the description, and search for it later."}
43
- ]
44
- }]
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  # Process video using chat template
47
  inputs = self.processor.apply_chat_template(
@@ -52,11 +63,12 @@ class VideoAnalyzer:
52
  return_tensors="pt"
53
  ).to(self.model.device)
54
 
55
- # Generate description
56
  generated_ids = self.model.generate(
57
  **inputs,
58
- do_sample=False,
59
- max_new_tokens=100
 
60
  )
61
  description = self.processor.batch_decode(
62
  generated_ids,
@@ -64,7 +76,7 @@ class VideoAnalyzer:
64
  )[0]
65
 
66
  return [{
67
- "description": description
68
  }]
69
 
70
  except Exception as e:
 
34
  def process_video(self, video_path: str, frame_interval: int = 30) -> List[Dict]:
35
  logger.info(f"Processing video: {video_path} with frame_interval={frame_interval}")
36
  try:
37
+ # Create message for model with detailed system prompt
38
+ messages = [
39
+ {
40
+ "role": "system",
41
+ "content": [
42
+ {
43
+ "type": "text",
44
+ "text": "You are a detailed video analysis assistant that can understand videos. Your task is to provide comprehensive descriptions including all events, actions, and important details with their timestamps. Focus on being specific and thorough."
45
+ }
46
+ ]
47
+ },
48
+ {
49
+ "role": "user",
50
+ "content": [
51
+ {"type": "video", "path": video_path},
52
+ {"type": "text", "text": "Please provide a detailed analysis of this video. Include:\n1. All significant actions and events\n2. Temporal information and timestamps\n3. Important visual details and context\n4. Any text or speech content if present\n5. Scene transitions and changes\nBe thorough and specific so the description can be used for detailed searching later."}
53
+ ]
54
+ }
55
+ ]
56
 
57
  # Process video using chat template
58
  inputs = self.processor.apply_chat_template(
 
63
  return_tensors="pt"
64
  ).to(self.model.device)
65
 
66
+ # Generate description with increased token limit
67
  generated_ids = self.model.generate(
68
  **inputs,
69
+ do_sample=True,
70
+ temperature=0.7,
71
+ max_new_tokens=512 # Increased from 100 to get more detailed descriptions
72
  )
73
  description = self.processor.batch_decode(
74
  generated_ids,
 
76
  )[0]
77
 
78
  return [{
79
+ "description": description.split("Assistant: ")[-1] # Remove assistant prefix if present
80
  }]
81
 
82
  except Exception as e: