VishalD1234 commited on
Commit
09c19f9
·
verified ·
1 Parent(s): 23adf2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -109
app.py CHANGED
@@ -323,112 +323,4 @@ def create_interface():
323
 
324
  if __name__ == "__main__":
325
  demo = create_interface()
326
- demo.queue().launch(share=True)import gradio as gr
327
- import torch
328
- import numpy as np
329
- from PIL import Image
330
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
331
-
332
- MODEL_PATH = "THUDM/cogvlm2-video-llama3-chat"
333
- DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
334
- TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
335
-
336
- def load_model():
337
- """Loads the pre-trained model and tokenizer with quantization configurations."""
338
- quantization_config = BitsAndBytesConfig(
339
- load_in_4bit=True,
340
- bnb_4bit_compute_dtype=TORCH_TYPE,
341
- bnb_4bit_use_double_quant=True,
342
- bnb_4bit_quant_type="nf4"
343
- )
344
-
345
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
346
- model = AutoModelForCausalLM.from_pretrained(
347
- MODEL_PATH,
348
- torch_dtype=TORCH_TYPE,
349
- trust_remote_code=True,
350
- quantization_config=quantization_config,
351
- device_map="auto"
352
- ).eval()
353
-
354
- return model, tokenizer
355
-
356
- def predict_image(prompt, image, temperature, model, tokenizer):
357
- """Generates predictions based on the image and textual prompt."""
358
- image = image.convert("RGB") # Ensure image is in RGB format
359
-
360
- # Convert image to model-expected format
361
- inputs = model.build_conversation_input_ids(
362
- tokenizer=tokenizer,
363
- query=prompt,
364
- images=[image],
365
- history=[],
366
- template_version='chat'
367
- )
368
-
369
- inputs = {
370
- 'input_ids': inputs['input_ids'].unsqueeze(0).to(DEVICE),
371
- 'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(DEVICE),
372
- 'attention_mask': inputs['attention_mask'].unsqueeze(0).to(DEVICE),
373
- 'images': [[inputs['images'][0].to(DEVICE).to(TORCH_TYPE)]],
374
- }
375
-
376
- gen_kwargs = {
377
- "max_new_tokens": 512,
378
- "pad_token_id": 128002,
379
- "top_k": 1,
380
- "do_sample": False,
381
- "top_p": 0.1,
382
- "temperature": temperature,
383
- }
384
-
385
- with torch.no_grad():
386
- outputs = model.generate(**inputs, **gen_kwargs)
387
- outputs = outputs[:, inputs['input_ids'].shape[1]:]
388
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
389
-
390
- return response
391
-
392
- model, tokenizer = load_model()
393
-
394
- def inference(image):
395
- """Generates a description of the input image."""
396
- try:
397
- if not image:
398
- return "Please upload an image first."
399
-
400
- prompt = "Describe the image and the components observed in the given input image."
401
- temperature = 0.3
402
- response = predict_image(prompt, image, temperature, model, tokenizer)
403
-
404
- return response
405
- except Exception as e:
406
- return f"An error occurred during analysis: {str(e)}"
407
-
408
- def create_interface():
409
- """Creates the Gradio interface for Image Description System."""
410
- with gr.Blocks() as demo:
411
- gr.Markdown("""
412
- # Image Description System
413
- Upload an image, and the system will describe the image and its components.
414
- """)
415
-
416
- with gr.Row():
417
- with gr.Column():
418
- image_input = gr.Image(label="Upload Image", type="pil")
419
- analyze_btn = gr.Button("Describe Image", variant="primary")
420
-
421
- with gr.Column():
422
- output = gr.Textbox(label="Image Description", lines=10)
423
-
424
- analyze_btn.click(
425
- fn=inference,
426
- inputs=[image_input],
427
- outputs=[output]
428
- )
429
-
430
- return demo
431
-
432
- if __name__ == "__main__":
433
- demo = create_interface()
434
- demo.queue().launch(share=True)
 
323
 
324
  if __name__ == "__main__":
325
  demo = create_interface()
326
+ demo.queue().launch(share=True)