VishalD1234 commited on
Commit
72b5ab6
·
verified ·
1 Parent(s): c3e8ea5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -18
app.py CHANGED
@@ -6,10 +6,22 @@ from decord import cpu, VideoReader, bridge
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from transformers import BitsAndBytesConfig
8
 
9
- MODEL_PATH = "THUDM/cogvlm2-video-llama3-chat"
10
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
11
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def get_step_info(step_number):
14
  """Returns detailed information about a manufacturing step."""
15
  step_details = {
@@ -124,6 +136,8 @@ def get_step_info(step_number):
124
 
125
  return step_details.get(step_number, {"Error": "Invalid step number. Please provide a valid step number."})
126
 
 
 
127
  def load_video(video_data, strategy='chat'):
128
  """Loads and processes video data into a format suitable for model input."""
129
  bridge.set_bridge('torch')
@@ -171,10 +185,9 @@ def load_model():
171
  return model, tokenizer
172
 
173
  def predict(prompt, video_data, temperature, model, tokenizer):
174
- """Generates predictions based on the video and textual prompt, focusing on one delay reason."""
175
  video = load_video(video_data, strategy='chat')
176
 
177
- # Prepare the inputs for the model
178
  inputs = model.build_conversation_input_ids(
179
  tokenizer=tokenizer,
180
  query=prompt,
@@ -204,10 +217,7 @@ def predict(prompt, video_data, temperature, model, tokenizer):
204
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
205
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
206
 
207
- # Extract the output in the desired format (return just one delay reason)
208
- # We can assume the model will return one reason in the format "Output: <delay reason>"
209
- return response.strip().split('\n')[-1] # Assuming the reason is on the last line
210
-
211
 
212
  def get_analysis_prompt(step_number):
213
  """Constructs the prompt for analyzing delay reasons based on the selected step."""
@@ -218,25 +228,42 @@ def get_analysis_prompt(step_number):
218
 
219
  step_name = step_info["Name"]
220
  standard_time = step_info["Standard Time"]
221
- potential_delay_reasons = step_info.get("Potential_Delay_Reasons", [])
222
 
223
- # Constructing the prompt dynamically with potential delay reasons
224
- potential_reasons_text = "\n ".join(potential_delay_reasons)
225
-
226
  return f"""
227
- You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify the correct delay reason based on visual evidence from production line footage.
228
  Task Context:
229
  You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. The step is called {step_name}, and its standard time is {standard_time}.
230
  Required Analysis:
231
- Carefully observe the video for visual cues indicating production interruption. The possible delay reasons are:
232
- {potential_reasons_text}
233
- Please provide only the most likely delay reason in the following format:
 
 
 
 
 
234
 
235
- Output:
236
- <One specific delay reason>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  """
238
 
239
 
 
240
  model, tokenizer = load_model()
241
 
242
  def inference(video, step_number):
@@ -296,4 +323,4 @@ def create_interface():
296
 
297
  if __name__ == "__main__":
298
  demo = create_interface()
299
- demo.queue().launch(share=True)
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  from transformers import BitsAndBytesConfig
8
 
9
+ MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
10
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
11
  TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
12
 
13
+ # Delay Reasons for Each Manufacturing Step
14
+ DELAY_REASONS = {
15
+ "Step 1": ["Delay in Bead Insertion", "Lack of raw material"],
16
+ "Step 2": ["Inner Liner Adjustment by Technician", "Person rebuilding defective Tire Sections"],
17
+ "Step 3": ["Manual Adjustment in Ply1 apply", "Technician repairing defective Tire Sections"],
18
+ "Step 4": ["Delay in Bead set", "Lack of raw material"],
19
+ "Step 5": ["Delay in Turnup", "Lack of raw material"],
20
+ "Step 6": ["Person Repairing sidewall", "Person rebuilding defective Tire Sections"],
21
+ "Step 7": ["Delay in sidewall stitching", "Lack of raw material"],
22
+ "Step 8": ["No person available to load Carcass", "No person available to collect tire"]
23
+ }
24
+
25
  def get_step_info(step_number):
26
  """Returns detailed information about a manufacturing step."""
27
  step_details = {
 
136
 
137
  return step_details.get(step_number, {"Error": "Invalid step number. Please provide a valid step number."})
138
 
139
+
140
+
141
  def load_video(video_data, strategy='chat'):
142
  """Loads and processes video data into a format suitable for model input."""
143
  bridge.set_bridge('torch')
 
185
  return model, tokenizer
186
 
187
  def predict(prompt, video_data, temperature, model, tokenizer):
188
+ """Generates predictions based on the video and textual prompt."""
189
  video = load_video(video_data, strategy='chat')
190
 
 
191
  inputs = model.build_conversation_input_ids(
192
  tokenizer=tokenizer,
193
  query=prompt,
 
217
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
218
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
219
 
220
+ return response
 
 
 
221
 
222
  def get_analysis_prompt(step_number):
223
  """Constructs the prompt for analyzing delay reasons based on the selected step."""
 
228
 
229
  step_name = step_info["Name"]
230
  standard_time = step_info["Standard Time"]
231
+ analysis = step_info["Analysis"]
232
 
 
 
 
233
  return f"""
234
+ You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify delay reasons based on visual evidence from production line footage.
235
  Task Context:
236
  You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. The step is called {step_name}, and its standard time is {standard_time}.
237
  Required Analysis:
238
+ Carefully observe the video for visual cues indicating production interruption.
239
+ - If no person is visible in any of the frames, the reason probably might be due to their absence.
240
+ - If a person is visible in the video and is observed touching and modifying the layers of the tire, it indicates an issue with tire patching, and the person might be repairing it.
241
+ - Compare observed evidence against the following possible delay reasons:
242
+ - {analysis}
243
+ Following are the subactivities needs to happen in this step.
244
+
245
+ {get_step_info(step_number)}
246
 
247
+ Please provide your output in the following format:
248
+ Output_Examples = {
249
+ ["Delay in Bead Insertion", "Lack of raw material"],
250
+ ["Inner Liner Adjustment by Technician", "Person rebuilding defective Tire Sections"],
251
+ ["Manual Adjustment in Ply1 Apply", "Technician repairing defective Tire Sections"],
252
+ ["Delay in Bead Set", "Lack of raw material"],
253
+ ["Delay in Turnup", "Lack of raw material"],
254
+ ["Person Repairing Sidewall", "Person rebuilding defective Tire Sections"],
255
+ ["Delay in Sidewall Stitching", "Lack of raw material"],
256
+ ["No person available to load Carcass", "No person available to collect tire"]
257
+ }
258
+ 1. **Selected Reason:** [State the most likely reason from the given options]
259
+ 2. **Visual Evidence:** [Describe specific visual cues that support your selection]
260
+ 3. **Reasoning:** [Explain why this reason best matches the observed evidence]
261
+ 4. **Alternative Analysis:** [Brief explanation of why other possible reasons are less likely]
262
+ Important: Base your analysis solely on visual evidence from the video. Focus on concrete, observable details rather than assumptions. Clearly state if no person or specific activity is observed.
263
  """
264
 
265
 
266
+
267
  model, tokenizer = load_model()
268
 
269
  def inference(video, step_number):
 
323
 
324
  if __name__ == "__main__":
325
  demo = create_interface()
326
+ demo.queue().launch(share=True)