Spaces:

DAMO-NLP-SG
/

CLEX-Chat

Runtime error

App Files Files Community

Guanzheng commited on Oct 26, 2023

Commit

672cd19

1 Parent(s): 4e695f0

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -11,34 +11,46 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = 4096
 DESCRIPTION = """\
-# Llama-2 7B Chat
-This Space demonstrates model [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta, a Llama 2 model with 7B parameters fine-tuned for chat instructions. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
-🔎 For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
-🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
 """
-LICENSE = """
-<p/>
----
-As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
-this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-if torch.cuda.is_available():
-    model_id = "DAMO-NLP-SG/CLEX-7b-Chat-16K"
-    # from CLEX import LlamaForCausalLM
-    from transformers import AutoModelForCausalLM
-    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    tokenizer.use_default_system_prompt = False
 import PyPDF2
 from io import BytesIO
@@ -199,7 +211,7 @@ with gr.Blocks(css="style.css") as demo:
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
-    gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=False)

 MAX_INPUT_TOKEN_LENGTH = 4096
 DESCRIPTION = """\
+# CLEX-7B-Chat-16K
+This Space demonstrates model [CLEX-7B-Chat-16K](https://huggingface.co/DAMO-NLP-SG/CLEX-7B-Chat-16K), a Llama-2-7B model fine-tuned using our [CLEX](https://arxiv.org/abs/2310.16450) method. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
+The model supports the maximun input sequence length of 64k now.
 """
+# LICENSE = """
+# <p/>
+# ---
+# As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
+# this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
+# """
+CITE = """
+If you find our project useful, hope you can star our repo and cite our paper as follows:
+```
+@article{damonlpsg2023clex,
+  author = {Chen, Guanzheng and Li, Xin and Meng, Zaiqiao and Liang, Shangsong and Bing, Lidong},
+  title = {CLEX: Continuous Length Extrapolation for Large Language Models},
+  year = 2023,
+  url = {https://arxiv.org/abs/2310.16450}
+}
+```
 """
 if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+# if torch.cuda.is_available():
+model_id = "DAMO-NLP-SG/CLEX-7b-Chat-16K"
+# from CLEX import LlamaForCausalLM
+from transformers import AutoModelForCausalLM
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokenizer.use_default_system_prompt = False
 import PyPDF2
 from io import BytesIO
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     chat_interface.render()
+    gr.Markdown(CITE)
 if __name__ == "__main__":
     demo.queue(max_size=20).launch(share=False)