Spaces:

ignitariumcloud
/

TBMOPS_GENAI

Runtime error

App Files Files Community

TBMOPS_GENAI / app.py

arjunanand13

Create app.py

5ef4c3f verified 10 months ago

raw

history blame

6.17 kB

	import gradio as gr
	import io
	import numpy as np
	import torch
	from decord import cpu, VideoReader, bridge
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from transformers import BitsAndBytesConfig
	import json

	# Model Configuration
	MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16

	# Define delay reasons for each step
	DELAY_REASONS = {
	"Step 1": ["No raw material available", "Person repatching the tire"],
	"Step 2": ["Person repatching the tire", "Lack of raw material"],
	"Step 3": ["Person repatching the tire", "Lack of raw material"],
	"Step 4": ["Person repatching the tire", "Lack of raw material"],
	"Step 5": ["Person repatching the tire", "Lack of raw material"],
	"Step 6": ["Person repatching the tire", "Lack of raw material"],
	"Step 7": ["Person repatching the tire", "Lack of raw material"],
	"Step 8": ["No person available to collect tire", "Person repatching the tire"]
	}

	def load_video(video_data, strategy='chat'):
	bridge.set_bridge('torch')
	mp4_stream = video_data
	num_frames = 24
	decord_vr = VideoReader(io.BytesIO(mp4_stream), ctx=cpu(0))

	frame_id_list = []
	total_frames = len(decord_vr)
	timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
	max_second = round(max(timestamps)) + 1

	for second in range(max_second):
	closest_num = min(timestamps, key=lambda x: abs(x - second))
	index = timestamps.index(closest_num)
	frame_id_list.append(index)
	if len(frame_id_list) >= num_frames:
	break

	video_data = decord_vr.get_batch(frame_id_list)
	video_data = video_data.permute(3, 0, 1, 2)
	return video_data

	def load_model():
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=TORCH_TYPE,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	torch_dtype=TORCH_TYPE,
	trust_remote_code=True,
	quantization_config=quantization_config,
	device_map="auto"
	).eval()

	return model, tokenizer

	def predict(prompt, video_data, temperature, model, tokenizer):
	video = load_video(video_data, strategy='chat')

	inputs = model.build_conversation_input_ids(
	tokenizer=tokenizer,
	query=prompt,
	images=[video],
	history=[],
	template_version='chat'
	)

	inputs = {
	'input_ids': inputs['input_ids'].unsqueeze(0).to(DEVICE),
	'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(DEVICE),
	'attention_mask': inputs['attention_mask'].unsqueeze(0).to(DEVICE),
	'images': [[inputs['images'][0].to(DEVICE).to(TORCH_TYPE)]],
	}

	gen_kwargs = {
	"max_new_tokens": 2048,
	"pad_token_id": 128002,
	"top_k": 1,
	"do_sample": False,
	"top_p": 0.1,
	"temperature": temperature,
	}

	with torch.no_grad():
	outputs = model.generate(inputs, gen_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return response

	def get_analysis_prompt(step_number, possible_reasons):
	return f"""Analyze the video of Step {step_number} in the tire manufacturing process.

	Possible delay reasons for this step are:
	{', '.join(possible_reasons)}

	Based on the video evidence, determine which of these reasons best explains the delay.
	Please provide:
	1. Your chosen reason from the list above
	2. Specific visual evidence supporting this choice
	3. Brief explanation of why other reasons are less likely

	Focus your analysis on visual cues that support your conclusion."""

	def inference(video, step_number, selected_reason):
	if not video:
	return "Please upload a video first."

	try:
	model, tokenizer = load_model()
	video_data = video.read()

	# Get possible reasons for the selected step
	possible_reasons = DELAY_REASONS[step_number]

	# Generate the analysis prompt
	prompt = get_analysis_prompt(step_number, possible_reasons)

	# Get model prediction
	temperature = 0.8
	response = predict(prompt, video_data, temperature, model, tokenizer)

	return response

	except Exception as e:
	return f"An error occurred: {str(e)}"

	def update_reasons(step):
	"""Update the dropdown choices based on the selected step"""
	return gr.Dropdown(choices=DELAY_REASONS[step])

	# Gradio Interface
	def create_interface():
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	video = gr.Video(label="Upload Manufacturing Video", sources=["upload"])
	step_number = gr.Dropdown(
	choices=list(DELAY_REASONS.keys()),
	label="Manufacturing Step",
	value="Step 1"
	)
	reason = gr.Dropdown(
	choices=DELAY_REASONS["Step 1"],
	label="Select Delay Reason",
	value=DELAY_REASONS["Step 1"][0]
	)
	analyze_btn = gr.Button("Analyze Delay", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Analysis Result", lines=10)

	# Update reasons when step changes
	step_number.change(
	fn=update_reasons,
	inputs=[step_number],
	outputs=[reason]
	)

	# Trigger analysis when button is clicked
	analyze_btn.click(
	fn=inference,
	inputs=[video, step_number, reason],
	outputs=[output]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()