ragavsachdeva commited on
Commit
a882846
·
1 Parent(s): ceaf475
Files changed (2) hide show
  1. app.py +59 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ from transformers import AutoModel
5
+ import torch
6
+
7
+ # Load the model
8
+ model = AutoModel.from_pretrained("ragavsachdeva/magiv2", trust_remote_code=True).cuda().eval()
9
+
10
+ def read_image(image):
11
+ image = Image.open(image).convert("L").convert("RGB")
12
+ image = np.array(image)
13
+ return image
14
+
15
+ def process_images(chapter_pages, character_bank_images, character_bank_names):
16
+ chapter_pages = [read_image(image) for image in chapter_pages]
17
+ character_bank = {
18
+ "images": [read_image(image) for image in character_bank_images],
19
+ "names": character_bank_names.split(",")
20
+ }
21
+
22
+ with torch.no_grad():
23
+ per_page_results = model.do_chapter_wide_prediction(chapter_pages, character_bank, use_tqdm=True, do_ocr=True)
24
+
25
+ output_images = []
26
+ transcript = []
27
+ for i, (image, page_result) in enumerate(zip(chapter_pages, per_page_results)):
28
+ output_image = model.visualise_single_image_prediction(image, page_result, filename=None)
29
+ output_images.append(output_image)
30
+
31
+ speaker_name = {
32
+ text_idx: page_result["character_names"][char_idx] for text_idx, char_idx in page_result["text_character_associations"]
33
+ }
34
+
35
+ for j in range(len(page_result["ocr"])):
36
+ if not page_result["is_essential_text"][j]:
37
+ continue
38
+ name = speaker_name.get(j, "unsure")
39
+ transcript.append(f"<{name}>: {page_result['ocr'][j]}")
40
+
41
+ transcript_text = "\n".join(transcript)
42
+
43
+ return output_images, transcript_text
44
+
45
+ # Define Gradio interface
46
+ chapter_pages_input = gr.Files(label="Chapter Pages")
47
+ character_bank_images_input = gr.Files(label="Character Bank Images")
48
+ character_bank_names_input = gr.Textbox(label="Character Bank Names (comma separated)")
49
+
50
+ output_images = gr.Gallery(label="Output Images")
51
+ transcript_output = gr.Textbox(label="Transcript")
52
+
53
+ gr.Interface(
54
+ fn=process_images,
55
+ inputs=[chapter_pages_input, character_bank_images_input, character_bank_names_input],
56
+ outputs=[output_images, transcript_output],
57
+ title="Tails Tell Tails: Chapter-Wide Manga Transcriptions With Character Names",
58
+ description="Upload chapter pages and character bank to get annotated images and transcript.",
59
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ einops
3
+ --extra-index-url https://download.pytorch.org/whl/cu113
4
+ torch
5
+ shapely
6
+ timm
7
+ scipy
8
+ tokenizers
9
+ pulp