tstone87 commited on
Commit
8f73070
·
verified ·
1 Parent(s): 6b8cb6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -1,37 +1,45 @@
1
  import gradio as gr
2
- from transformers import AutoModel
3
  from PIL import Image
4
  import torch
5
  import pdfplumber
6
 
7
- # Load the model
8
- model = AutoModel.from_pretrained("deepseek-ai/Janus-1.3B")
 
 
9
 
10
  def process_input(input_data):
11
  if isinstance(input_data, str):
12
  return handle_text(input_data)
13
  elif isinstance(input_data, Image.Image):
14
  return handle_image(input_data)
15
- elif isinstance(input_data, bytes):
16
- return handle_pdf(input_data)
17
  else:
18
  return "Unsupported input type."
19
 
20
  def handle_text(text):
21
- return f"Processed text: {text}"
 
 
22
 
23
  def handle_image(image):
24
  return "Image processing not implemented yet."
25
 
26
- def handle_pdf(pdf_bytes):
27
- with pdfplumber.open(pdf_bytes) as pdf:
28
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
29
  return handle_text(text)
30
 
31
  # Create Gradio app
32
  iface = gr.Interface(
33
  fn=process_input,
34
- inputs=[gr.Textbox(label="Enter text"), gr.Image(label="Upload image"), gr.File(label="Upload PDF")],
 
 
 
 
35
  outputs=gr.Textbox(),
36
  title="Multimodal Chatbot",
37
  description="Handles text, images, and PDFs with the same entry point."
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from PIL import Image
4
  import torch
5
  import pdfplumber
6
 
7
+ # Load the model and tokenizer
8
+ model_name = "deepseek-ai/Janus-1.3B"
9
+ model = AutoModelForCausalLM.from_pretrained(model_name)
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  def process_input(input_data):
13
  if isinstance(input_data, str):
14
  return handle_text(input_data)
15
  elif isinstance(input_data, Image.Image):
16
  return handle_image(input_data)
17
+ elif isinstance(input_data, dict) and "name" in input_data:
18
+ return handle_pdf(input_data["name"])
19
  else:
20
  return "Unsupported input type."
21
 
22
  def handle_text(text):
23
+ inputs = tokenizer(text, return_tensors="pt")
24
+ outputs = model.generate(**inputs, max_new_tokens=100)
25
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
26
 
27
  def handle_image(image):
28
  return "Image processing not implemented yet."
29
 
30
+ def handle_pdf(pdf_path):
31
+ with pdfplumber.open(pdf_path) as pdf:
32
  text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
33
  return handle_text(text)
34
 
35
  # Create Gradio app
36
  iface = gr.Interface(
37
  fn=process_input,
38
+ inputs=[
39
+ gr.Textbox(label="Enter text"),
40
+ gr.Image(label="Upload image"),
41
+ gr.File(label="Upload PDF")
42
+ ],
43
  outputs=gr.Textbox(),
44
  title="Multimodal Chatbot",
45
  description="Handles text, images, and PDFs with the same entry point."