arjunanand13 commited on
Commit
79e78be
·
verified ·
1 Parent(s): 1dedabd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -6,12 +6,12 @@ from decord import cpu, VideoReader, bridge
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from transformers import BitsAndBytesConfig
8
 
9
- # Model Configuration
10
  MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
11
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
12
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
13
 
14
- # Define delay reasons for each step
15
  DELAY_REASONS = {
16
  "Step 1": ["No raw material available", "Person repatching the tire"],
17
  "Step 2": ["Person repatching the tire", "Lack of raw material"],
@@ -25,9 +25,13 @@ DELAY_REASONS = {
25
 
26
  def load_video(video_data, strategy='chat'):
27
  bridge.set_bridge('torch')
28
- mp4_stream = video_data
29
  num_frames = 24
30
- decord_vr = VideoReader(io.BytesIO(mp4_stream), ctx=cpu(0))
 
 
 
 
 
31
 
32
  frame_id_list = []
33
  total_frames = len(decord_vr)
@@ -100,44 +104,41 @@ def predict(prompt, video_data, temperature, model, tokenizer):
100
 
101
  def get_analysis_prompt(step_number, possible_reasons):
102
  return f"""You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify delay reasons based on visual evidence from production line footage.
103
-
104
  Task Context:
105
  You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. Your task is to determine the most likely cause of the delay from the following possible reasons:
106
  {', '.join(possible_reasons)}
107
-
108
  Required Analysis:
109
  1. Carefully observe the video for visual cues indicating production interruption
110
  2. Compare observed evidence against each possible delay reason
111
  3. Select the most likely reason based on visual evidence
112
-
113
  Please provide your analysis in the following format:
114
  1. Selected Reason: [State the most likely reason from the given options]
115
  2. Visual Evidence: [Describe specific visual cues that support your selection]
116
  3. Reasoning: [Explain why this reason best matches the observed evidence]
117
  4. Alternative Analysis: [Brief explanation of why other possible reasons are less likely]
118
-
119
  Important: Base your analysis solely on visual evidence from the video. Focus on concrete, observable details rather than assumptions."""
120
 
 
121
  model, tokenizer = load_model()
122
 
123
  def inference(video, step_number):
124
- if not video:
125
- return "Please upload a video first."
126
-
127
- with open(video_path, "rb") as f:
128
- video_data = f.read()
129
 
130
- # Get possible reasons for the selected step
131
- possible_reasons = DELAY_REASONS[step_number]
132
-
133
- # Generate the analysis prompt
134
- prompt = get_analysis_prompt(step_number, possible_reasons)
135
-
136
- # Get model prediction
137
- temperature = 0.8
138
- response = predict(prompt, video_data, temperature, model, tokenizer)
139
-
140
- return response
 
 
141
 
142
  # Gradio Interface
143
  def create_interface():
@@ -172,4 +173,4 @@ def create_interface():
172
 
173
  if __name__ == "__main__":
174
  demo = create_interface()
175
- demo.launch()
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from transformers import BitsAndBytesConfig
8
 
9
+
10
  MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
11
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
12
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
13
 
14
+
15
  DELAY_REASONS = {
16
  "Step 1": ["No raw material available", "Person repatching the tire"],
17
  "Step 2": ["Person repatching the tire", "Lack of raw material"],
 
25
 
26
  def load_video(video_data, strategy='chat'):
27
  bridge.set_bridge('torch')
 
28
  num_frames = 24
29
+
30
+
31
+ if isinstance(video_data, str):
32
+ decord_vr = VideoReader(video_data, ctx=cpu(0))
33
+ else:
34
+ decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
35
 
36
  frame_id_list = []
37
  total_frames = len(decord_vr)
 
104
 
105
  def get_analysis_prompt(step_number, possible_reasons):
106
  return f"""You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify delay reasons based on visual evidence from production line footage.
 
107
  Task Context:
108
  You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. Your task is to determine the most likely cause of the delay from the following possible reasons:
109
  {', '.join(possible_reasons)}
 
110
  Required Analysis:
111
  1. Carefully observe the video for visual cues indicating production interruption
112
  2. Compare observed evidence against each possible delay reason
113
  3. Select the most likely reason based on visual evidence
 
114
  Please provide your analysis in the following format:
115
  1. Selected Reason: [State the most likely reason from the given options]
116
  2. Visual Evidence: [Describe specific visual cues that support your selection]
117
  3. Reasoning: [Explain why this reason best matches the observed evidence]
118
  4. Alternative Analysis: [Brief explanation of why other possible reasons are less likely]
 
119
  Important: Base your analysis solely on visual evidence from the video. Focus on concrete, observable details rather than assumptions."""
120
 
121
+ # Load model globally
122
  model, tokenizer = load_model()
123
 
124
  def inference(video, step_number):
125
+ try:
126
+ if not video:
127
+ return "Please upload a video first."
 
 
128
 
129
+ # Get possible reasons for the selected step
130
+ possible_reasons = DELAY_REASONS[step_number]
131
+
132
+ # Generate the analysis prompt
133
+ prompt = get_analysis_prompt(step_number, possible_reasons)
134
+
135
+ # Get model prediction
136
+ temperature = 0.8
137
+ response = predict(prompt, video, temperature, model, tokenizer)
138
+
139
+ return response
140
+ except Exception as e:
141
+ return f"An error occurred during analysis: {str(e)}"
142
 
143
  # Gradio Interface
144
  def create_interface():
 
173
 
174
  if __name__ == "__main__":
175
  demo = create_interface()
176
+ demo.launch(share=True) # Added share=True to create a public link