Spaces:

Jimhugging
/

CogVLM2-4-Doc

Sleeping

App Files Files Community

DoctorSlimm commited on May 26, 2024

Commit

a539d3b

verified ·

1 Parent(s): 9af21de

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -63

app.py CHANGED Viewed

@@ -1,81 +1,137 @@
 import os
-import spaces
 import torch
 import gradio as gr
-# cpu
-zero = torch.Tensor([0]).cuda()
-print(zero.device) # <-- 'cpu' 🤔
-# gpu
-model = None
 @spaces.GPU
-def greet(prompts, separator):
-    # print(zero.device) # <-- 'cuda:0' 🤗
-    from vllm import SamplingParams, LLM
-    from transformers.utils import move_cache
-    from huggingface_hub import snapshot_download, login
-    global model
-    if model is None:
-        LLM_MODEL_ID = "DoctorSlimm/trim-music-31"
-        # LLM_MODEL_ID = "mistral-community/Mistral-7B-v0.2"
-        # LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
-        os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'
-        fp = snapshot_download(LLM_MODEL_ID, token=os.getenv('HF_TOKEN'), revision='main')
-        move_cache()
-        model = LLM(fp)
-    sampling_params = dict(
-            temperature = 0.01,
-            ignore_eos = False,
-            max_tokens = int(512 * 2)
-    )
-    sampling_params = SamplingParams(**sampling_params)
-    multi_prompt = False
-    separator = separator.strip()
-    if separator in prompts:
-        multi_prompt = True
-        prompts = prompts.split(separator)
-    else:
-        prompts = [prompts]
-    for idx, pt in enumerate(prompts):
-        print()
-        print(f'[{idx}]:')
-        print(pt)
-    model_outputs = model.generate(prompts, sampling_params)
-    generations = []
-    for output in model_outputs:
-        for outputs in output.outputs:
-            generations.append(outputs.text)
-    if multi_prompt:
-        return separator.join(generations)
-    return generations[0]
 ## make predictions via api ##
 # https://www.gradio.app/guides/getting-started-with-the-python-client#connecting-a-general-gradio-app
 demo = gr.Interface(
-    fn=greet,
-    inputs=[
-        gr.Text(
-            value='hello sir!<SEP>bonjour madame...',
-            placeholder='hello sir!<SEP>bonjour madame...',
-            label='list of prompts separated by separator'
-        ),
-        gr.Text(
-            value='<SEP>',
-            placeholder='<SEP>',
-            label='separator for your prompts'
-        )],
-    outputs=gr.Text()
 )
-demo.launch(share=True)

 import os
 import torch
+import spaces
 import gradio as gr
+from PIL import Image
+from transformers.utils import move_cache
+from huggingface_hub import snapshot_download
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load the model and processor
+MODEL_PATH = "THUDM/cogvlm2-llama3-chat-19B"
+os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'
+MODEL_PATH = snapshot_download(MODEL_PATH)
+move_cache()
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_PATH,
+    trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=TORCH_TYPE,
+    trust_remote_code=True,
+).to(DEVICE).eval()
 @spaces.GPU
+def generate_caption(image, prompt):
+    # Process the image and the prompt
+    text_only_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
+    # inputs = processor(texts=[prompt], images=[image], return_tensors="pt").to('cuda') # move inputs to cuda
+    return
 ## make predictions via api ##
 # https://www.gradio.app/guides/getting-started-with-the-python-client#connecting-a-general-gradio-app
 demo = gr.Interface(
+    fn=generate_caption,
+    inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="Describe the image in great detail")],
+    outputs=gr.Textbox(label="Generated Caption"),
+    description=description
+)
+# Launch the interface
+demo.launch(share=True)
+####### ML CODE #######
+import torch
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer
+MODEL_PATH = "THUDM/cogvlm2-llama3-chat-19B"
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_PATH,
+    trust_remote_code=True
 )
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=TORCH_TYPE,
+    trust_remote_code=True,
+).to(DEVICE).eval()
+text_only_template = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
+while True:
+    image_path = input("image path >>>>> ")
+    if image_path == '':
+        print('You did not enter image path, the following will be a plain text conversation.')
+        image = None
+        text_only_first_query = True
+    else:
+        image = Image.open(image_path).convert('RGB')
+    history = []
+    while True:
+        query = input("Human:")
+        if query == "clear":
+            break
+        if image is None:
+            if text_only_first_query:
+                query = text_only_template.format(query)
+                text_only_first_query = False
+            else:
+                old_prompt = ''
+                for _, (old_query, response) in enumerate(history):
+                    old_prompt += old_query + " " + response + "\n"
+                query = old_prompt + "USER: {} ASSISTANT:".format(query)
+        if image is None:
+            input_by_model = model.build_conversation_input_ids(
+                tokenizer,
+                query=query,
+                history=history,
+                template_version='chat'
+            )
+        else:
+            input_by_model = model.build_conversation_input_ids(
+                tokenizer,
+                query=query,
+                history=history,
+                images=[image],
+                template_version='chat'
+            )
+        inputs = {
+            'input_ids': input_by_model['input_ids'].unsqueeze(0).to(DEVICE),
+            'token_type_ids': input_by_model['token_type_ids'].unsqueeze(0).to(DEVICE),
+            'attention_mask': input_by_model['attention_mask'].unsqueeze(0).to(DEVICE),
+            'images': [[input_by_model['images'][0].to(DEVICE).to(TORCH_TYPE)]] if image is not None else None,
+        }
+        gen_kwargs = {
+            "max_new_tokens": 2048,
+            "pad_token_id": 128002,
+        }
+        with torch.no_grad():
+            outputs = model.generate(**inputs, **gen_kwargs)
+            outputs = outputs[:, inputs['input_ids'].shape[1]:]
+            response = tokenizer.decode(outputs[0])
+            response = response.split("<|end_of_text|>")[0]
+            print("\nCogVLM2:", response)
+        history.append((query, response))