Construction_Snag_Tool_Llama_3.2_Vision

Running

App Files Files Community

Construction_Snag_Tool_Llama_3.2_Vision / app.py

capradeepgujaran

Create app.py

4ec8ad4 verified 8 months ago

raw

history blame

1.9 kB

	import gradio as gr
	from PIL import Image
	import torch
	from transformers import AutoProcessor, LlamaForCausalLM, LlamaTokenizer

	# Load the Llama 2 model and processor
	# Note: You'll need to replace these with the actual Llama 3.2 vision model when it becomes available
	model_name = "meta-llama/Llama-2-7b-chat-hf"
	processor = AutoProcessor.from_pretrained(model_name)
	model = LlamaForCausalLM.from_pretrained(model_name)
	tokenizer = LlamaTokenizer.from_pretrained(model_name)

	def analyze_construction_image(image):
	# Process the image
	inputs = processor(images=image, return_tensors="pt")

	# Generate text based on the image
	prompt = "Analyze this construction image and identify the snag category, snag description, and steps to desnag."
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids

	# Concatenate the image embeddings with the text input
	combined_inputs = torch.cat([inputs.pixel_values, input_ids], dim=1)

	# Generate output
	outputs = model.generate(combined_inputs, max_length=300)
	result = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Parse the result (this is a simplified example)
	lines = result.split('\n')
	snag_category = lines[0] if len(lines) > 0 else "N/A"
	snag_description = lines[1] if len(lines) > 1 else "N/A"
	desnag_steps = lines[2:] if len(lines) > 2 else ["N/A"]

	return snag_category, snag_description, "\n".join(desnag_steps)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=analyze_construction_image,
	inputs=gr.Image(type="pil"),
	outputs=[
	gr.Textbox(label="Snag Category"),
	gr.Textbox(label="Snag Description"),
	gr.Textbox(label="Steps to Desnag")
	],
	title="Construction Image Analyzer",
	description="Upload a construction site image to identify issues and get desnag steps."
	)

	# Launch the app
	iface.launch()