xzuyn commited on
Commit
917a01f
·
verified ·
1 Parent(s): e9a4b56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -28
app.py CHANGED
@@ -4,34 +4,26 @@ import re
4
  from PIL import Image
5
  import os
6
  import numpy as np
7
-
8
  import spaces
9
  import subprocess
 
 
 
10
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
 
12
- model = AutoModelForCausalLM.from_pretrained('thwri/CogFlorence-2.1-Large', trust_remote_code=True).eval()#.to("cuda").eval()
13
- processor = AutoProcessor.from_pretrained('thwri/CogFlorence-2.1-Large', trust_remote_code=True)
 
 
 
 
 
 
 
14
 
15
- TITLE = "# [thwri/CogFlorence-2.1-Large](https://huggingface.co/thwri/CogFlorence-2.1-Large/)"
16
- DESCRIPTION = "[microsoft/Florence-2-large](https://huggingface.co/microsoft/Florence-2-large) tuned on [Ejafa/ye-pop](https://huggingface.co/datasets/Ejafa/ye-pop) captioned with [CogVLM2](https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B)"
17
 
18
- def modify_caption(caption: str) -> str:
19
- special_patterns = [
20
- (r'the image is ', ''),
21
- (r'the image captures ', ''),
22
- (r'the image showcases ', ''),
23
- (r'the image shows ', ''),
24
- (r'the image ', ''),
25
- ]
26
-
27
- for pattern, replacement in special_patterns:
28
- caption = re.sub(pattern, replacement, caption, flags=re.IGNORECASE)
29
-
30
- caption = caption.replace('\n', '').replace('\r', '')
31
- caption = re.sub(r'(?<=[.,?!])(?=[^\s])', r' ', caption)
32
- caption = ' '.join(caption.strip().splitlines())
33
-
34
- return caption
35
 
36
  @spaces.GPU
37
  def process_image(image):
@@ -42,19 +34,18 @@ def process_image(image):
42
  if image.mode != "RGB":
43
  image = image.convert("RGB")
44
 
45
- prompt = "<MORE_DETAILED_CAPTION>"
46
-
47
- inputs = processor(text=prompt, images=image, return_tensors="pt")#.to("cuda")
48
  generated_ids = model.generate(
49
  input_ids=inputs["input_ids"],
50
  pixel_values=inputs["pixel_values"],
51
  max_new_tokens=1024,
52
- num_beams=3,
53
  do_sample=True
54
  )
55
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
56
- parsed_answer = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))
57
- return modify_caption(parsed_answer["<MORE_DETAILED_CAPTION>"])
 
58
 
59
  def extract_frames(image_path, output_folder):
60
  with Image.open(image_path) as img:
@@ -72,6 +63,7 @@ def extract_frames(image_path, output_folder):
72
 
73
  return frame_paths
74
 
 
75
  def process_folder(folder_path):
76
  if not os.path.isdir(folder_path):
77
  return "Invalid folder path."
@@ -119,6 +111,7 @@ def process_folder(folder_path):
119
  processed_files.append(f"Processed {filename} -> {txt_filename}")
120
 
121
  result = "\n".join(processed_files + skipped_files)
 
122
  return result if result else "No image files found or all files were skipped in the specified folder."
123
 
124
  css = """
 
4
  from PIL import Image
5
  import os
6
  import numpy as np
 
7
  import spaces
8
  import subprocess
9
+ import torch
10
+
11
+
12
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
13
 
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ 'PJMixers-Dev/Florence-2-base-danbooru2022-316k',
16
+ trust_remote_code=True,
17
+ torch_dtype=torch.float32
18
+ ).eval()
19
+ processor = AutoProcessor.from_pretrained(
20
+ 'PJMixers-Dev/Florence-2-base-danbooru2022-316k',
21
+ trust_remote_code=True
22
+ )
23
 
24
+ TITLE = "# [PJMixers-Dev/Florence-2-base-danbooru2022-316k](https://huggingface.co/PJMixers-Dev/Florence-2-base-danbooru2022-316k/)"
25
+ DESCRIPTION = "[microsoft/Florence-2](https://huggingface.co/microsoft/Florence-2) tuned on [animelover/danbooru2022](https://huggingface.co/datasets/animelover/danbooru2022)."
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  @spaces.GPU
29
  def process_image(image):
 
34
  if image.mode != "RGB":
35
  image = image.convert("RGB")
36
 
37
+ inputs = processor(text="<CAPTION>", images=image, return_tensors="pt").to(torch.float32)
 
 
38
  generated_ids = model.generate(
39
  input_ids=inputs["input_ids"],
40
  pixel_values=inputs["pixel_values"],
41
  max_new_tokens=1024,
42
+ num_beams=5,
43
  do_sample=True
44
  )
45
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
46
+
47
+ return processor.post_process_generation(generated_text, task="<CAPTION>", image_size=(image.width, image.height))
48
+
49
 
50
  def extract_frames(image_path, output_folder):
51
  with Image.open(image_path) as img:
 
63
 
64
  return frame_paths
65
 
66
+
67
  def process_folder(folder_path):
68
  if not os.path.isdir(folder_path):
69
  return "Invalid folder path."
 
111
  processed_files.append(f"Processed {filename} -> {txt_filename}")
112
 
113
  result = "\n".join(processed_files + skipped_files)
114
+
115
  return result if result else "No image files found or all files were skipped in the specified folder."
116
 
117
  css = """