minor changes, based on https://huggingface.co/spaces/huggingface-projects/gemma-2-9b-it
Browse files
app.py
CHANGED
@@ -12,10 +12,11 @@ model_path = "facebook/chameleon-7b"
|
|
12 |
# model = ChameleonForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
|
13 |
# processor = ChameleonProcessor.from_pretrained(model_path)
|
14 |
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
|
|
|
15 |
processor = ChameleonProcessor.from_pretrained(model_path)
|
16 |
tokenizer = processor.tokenizer
|
17 |
|
18 |
-
@spaces.GPU
|
19 |
def respond(
|
20 |
message,
|
21 |
history: list[tuple[str, str]],
|
@@ -40,11 +41,11 @@ def respond(
|
|
40 |
image = Image.open(requests.get("https://uploads4.wikiart.org/images/paul-klee/death-for-the-idea-1915.jpg!Large.jpg", stream=True).raw)
|
41 |
|
42 |
inputs = processor(prompt, images=[image], return_tensors="pt").to(model.device, dtype=torch.bfloat16)
|
43 |
-
# out = model.generate(**inputs, max_new_tokens=40, do_sample=False)
|
44 |
|
45 |
streamer = TextIteratorStreamer(tokenizer)
|
46 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
|
47 |
|
|
|
48 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
49 |
thread.start()
|
50 |
|
|
|
12 |
# model = ChameleonForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
|
13 |
# processor = ChameleonProcessor.from_pretrained(model_path)
|
14 |
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
|
15 |
+
model.eval()
|
16 |
processor = ChameleonProcessor.from_pretrained(model_path)
|
17 |
tokenizer = processor.tokenizer
|
18 |
|
19 |
+
@spaces.GPU(duration=90)
|
20 |
def respond(
|
21 |
message,
|
22 |
history: list[tuple[str, str]],
|
|
|
41 |
image = Image.open(requests.get("https://uploads4.wikiart.org/images/paul-klee/death-for-the-idea-1915.jpg!Large.jpg", stream=True).raw)
|
42 |
|
43 |
inputs = processor(prompt, images=[image], return_tensors="pt").to(model.device, dtype=torch.bfloat16)
|
|
|
44 |
|
45 |
streamer = TextIteratorStreamer(tokenizer)
|
46 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
|
47 |
|
48 |
+
# launch generation in the background
|
49 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
50 |
thread.start()
|
51 |
|