Spaces:

ignitariumcloud
/

TBMOPS_GENAI

Runtime error

App Files Files Community

TBMOPS_GENAI / app.py

arjunanand13

Update app.py

79e78be verified 7 months ago

raw

history blame

6.69 kB

	import gradio as gr
	import io
	import numpy as np
	import torch
	from decord import cpu, VideoReader, bridge
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from transformers import BitsAndBytesConfig


	MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16


	DELAY_REASONS = {
	"Step 1": ["No raw material available", "Person repatching the tire"],
	"Step 2": ["Person repatching the tire", "Lack of raw material"],
	"Step 3": ["Person repatching the tire", "Lack of raw material"],
	"Step 4": ["Person repatching the tire", "Lack of raw material"],
	"Step 5": ["Person repatching the tire", "Lack of raw material"],
	"Step 6": ["Person repatching the tire", "Lack of raw material"],
	"Step 7": ["Person repatching the tire", "Lack of raw material"],
	"Step 8": ["No person available to collect tire", "Person repatching the tire"]
	}

	def load_video(video_data, strategy='chat'):
	bridge.set_bridge('torch')
	num_frames = 24


	if isinstance(video_data, str):
	decord_vr = VideoReader(video_data, ctx=cpu(0))
	else:
	decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))

	frame_id_list = []
	total_frames = len(decord_vr)
	timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
	max_second = round(max(timestamps)) + 1

	for second in range(max_second):
	closest_num = min(timestamps, key=lambda x: abs(x - second))
	index = timestamps.index(closest_num)
	frame_id_list.append(index)
	if len(frame_id_list) >= num_frames:
	break

	video_data = decord_vr.get_batch(frame_id_list)
	video_data = video_data.permute(3, 0, 1, 2)
	return video_data

	def load_model():
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=TORCH_TYPE,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4"
	)

	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_PATH,
	torch_dtype=TORCH_TYPE,
	trust_remote_code=True,
	quantization_config=quantization_config,
	device_map="auto"
	).eval()

	return model, tokenizer

	def predict(prompt, video_data, temperature, model, tokenizer):
	video = load_video(video_data, strategy='chat')

	inputs = model.build_conversation_input_ids(
	tokenizer=tokenizer,
	query=prompt,
	images=[video],
	history=[],
	template_version='chat'
	)

	inputs = {
	'input_ids': inputs['input_ids'].unsqueeze(0).to(DEVICE),
	'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(DEVICE),
	'attention_mask': inputs['attention_mask'].unsqueeze(0).to(DEVICE),
	'images': [[inputs['images'][0].to(DEVICE).to(TORCH_TYPE)]],
	}

	gen_kwargs = {
	"max_new_tokens": 2048,
	"pad_token_id": 128002,
	"top_k": 1,
	"do_sample": False,
	"top_p": 0.1,
	"temperature": temperature,
	}

	with torch.no_grad():
	outputs = model.generate(inputs, gen_kwargs)
	outputs = outputs[:, inputs['input_ids'].shape[1]:]
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return response

	def get_analysis_prompt(step_number, possible_reasons):
	return f"""You are an AI expert system specialized in analyzing manufacturing processes and identifying production delays in tire manufacturing. Your role is to accurately classify delay reasons based on visual evidence from production line footage.
	Task Context:
	You are analyzing video footage from Step {step_number} of a tire manufacturing process where a delay has been detected. Your task is to determine the most likely cause of the delay from the following possible reasons:
	{', '.join(possible_reasons)}
	Required Analysis:
	1. Carefully observe the video for visual cues indicating production interruption
	2. Compare observed evidence against each possible delay reason
	3. Select the most likely reason based on visual evidence
	Please provide your analysis in the following format:
	1. Selected Reason: [State the most likely reason from the given options]
	2. Visual Evidence: [Describe specific visual cues that support your selection]
	3. Reasoning: [Explain why this reason best matches the observed evidence]
	4. Alternative Analysis: [Brief explanation of why other possible reasons are less likely]
	Important: Base your analysis solely on visual evidence from the video. Focus on concrete, observable details rather than assumptions."""

	# Load model globally
	model, tokenizer = load_model()

	def inference(video, step_number):
	try:
	if not video:
	return "Please upload a video first."

	# Get possible reasons for the selected step
	possible_reasons = DELAY_REASONS[step_number]

	# Generate the analysis prompt
	prompt = get_analysis_prompt(step_number, possible_reasons)

	# Get model prediction
	temperature = 0.8
	response = predict(prompt, video, temperature, model, tokenizer)

	return response
	except Exception as e:
	return f"An error occurred during analysis: {str(e)}"

	# Gradio Interface
	def create_interface():
	with gr.Blocks() as demo:
	gr.Markdown("""
	# Manufacturing Delay Analysis System
	Upload a video of the manufacturing step and select the step number.
	The system will analyze the video and determine the most likely cause of delay.
	""")

	with gr.Row():
	with gr.Column():
	video = gr.Video(label="Upload Manufacturing Video", sources=["upload"])
	step_number = gr.Dropdown(
	choices=list(DELAY_REASONS.keys()),
	label="Manufacturing Step",
	value="Step 1"
	)
	analyze_btn = gr.Button("Analyze Delay", variant="primary")

	with gr.Column():
	output = gr.Textbox(label="Analysis Result", lines=10)

	# Trigger analysis when button is clicked
	analyze_btn.click(
	fn=inference,
	inputs=[video, step_number],
	outputs=[output]
	)

	return demo

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(share=True) # Added share=True to create a public link