Spaces:

darknoon
/

chameleon

Build error

darknoon commited on Jul 11, 2024

Commit

d10e0fb

1 Parent(s): 697a1f0

minor changes, based on https://huggingface.co/spaces/huggingface-projects/gemma-2-9b-it

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,10 +12,11 @@ model_path = "facebook/chameleon-7b"
 # model = ChameleonForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
 # processor = ChameleonProcessor.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
 processor = ChameleonProcessor.from_pretrained(model_path)
 tokenizer = processor.tokenizer
-@spaces.GPU
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -40,11 +41,11 @@ def respond(
     image = Image.open(requests.get("https://uploads4.wikiart.org/images/paul-klee/death-for-the-idea-1915.jpg!Large.jpg", stream=True).raw)
     inputs = processor(prompt, images=[image], return_tensors="pt").to(model.device, dtype=torch.bfloat16)
-    # out = model.generate(**inputs, max_new_tokens=40, do_sample=False)
     streamer = TextIteratorStreamer(tokenizer)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()

 # model = ChameleonForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
 # processor = ChameleonProcessor.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto")
+model.eval()
 processor = ChameleonProcessor.from_pretrained(model_path)
 tokenizer = processor.tokenizer
+@spaces.GPU(duration=90)
 def respond(
     message,
     history: list[tuple[str, str]],
     image = Image.open(requests.get("https://uploads4.wikiart.org/images/paul-klee/death-for-the-idea-1915.jpg!Large.jpg", stream=True).raw)
     inputs = processor(prompt, images=[image], return_tensors="pt").to(model.device, dtype=torch.bfloat16)
     streamer = TextIteratorStreamer(tokenizer)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
+    # launch generation in the background
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()