Ketengan-Diffusion-Lab commited on
Commit
c33e052
·
verified ·
1 Parent(s): 323d186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -14,6 +14,7 @@ warnings.filterwarnings('ignore')
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  print(f"Using device: {device}")
16
 
 
17
  model_name = 'failspy/kappa-3-phi-abliterated'
18
 
19
  # create model and load it to the specified device
@@ -30,37 +31,38 @@ tokenizer = AutoTokenizer.from_pretrained(
30
  )
31
 
32
  def inference(prompt, image, temperature, beam_size):
 
33
  messages = [
34
- {"role": "user", "content": f'<image>\n{prompt}'}
35
  ]
36
- text = tokenizer.apply_chat_template(
37
- messages,
38
- tokenize=False,
39
- add_generation_prompt=True
40
- )
41
-
42
- text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
43
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
44
 
45
- image_tensor = model.process_images([image], model.config).to(device)
 
46
 
47
  # Add debug prints
48
  print(f"Device of model: {next(model.parameters()).device}")
49
- print(f"Device of input_ids: {input_ids.device}")
50
- print(f"Device of image_tensor: {image_tensor.device}")
51
 
52
  # generate
53
  with torch.cuda.amp.autocast():
54
  output_ids = model.generate(
55
- input_ids,
56
- images=image_tensor,
57
  max_new_tokens=1024,
58
  temperature=temperature,
59
  num_beams=beam_size,
60
  use_cache=True
61
  )[0]
62
 
63
- return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
64
 
65
  with gr.Blocks() as demo:
66
  with gr.Row():
 
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  print(f"Using device: {device}")
16
 
17
+ # Update model path to your local path
18
  model_name = 'failspy/kappa-3-phi-abliterated'
19
 
20
  # create model and load it to the specified device
 
31
  )
32
 
33
  def inference(prompt, image, temperature, beam_size):
34
+ # Phi-3 uses a chat template
35
  messages = [
36
+ {"role": "user", "content": f"Can you describe this image?\n{prompt}"}
37
  ]
38
+
39
+ # Apply chat template and add generation prompt
40
+ inputs = tokenizer.apply_chat_template(
41
+ messages,
42
+ add_generation_prompt=True,
43
+ return_tensors="pt"
44
+ ).to(device)
 
45
 
46
+ # Process the image
47
+ pixel_values = model.prepare_image(image).to(device)
48
 
49
  # Add debug prints
50
  print(f"Device of model: {next(model.parameters()).device}")
51
+ print(f"Device of inputs: {inputs.input_ids.device}")
52
+ print(f"Device of pixel_values: {pixel_values.device}")
53
 
54
  # generate
55
  with torch.cuda.amp.autocast():
56
  output_ids = model.generate(
57
+ inputs.input_ids,
58
+ pixel_values=pixel_values,
59
  max_new_tokens=1024,
60
  temperature=temperature,
61
  num_beams=beam_size,
62
  use_cache=True
63
  )[0]
64
 
65
+ return tokenizer.decode(output_ids[inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
66
 
67
  with gr.Blocks() as demo:
68
  with gr.Row():