Guanzheng commited on
Commit
672cd19
·
1 Parent(s): 4e695f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -11,34 +11,46 @@ DEFAULT_MAX_NEW_TOKENS = 1024
11
  MAX_INPUT_TOKEN_LENGTH = 4096
12
 
13
  DESCRIPTION = """\
14
- # Llama-2 7B Chat
15
 
16
- This Space demonstrates model [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta, a Llama 2 model with 7B parameters fine-tuned for chat instructions. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
17
 
18
- 🔎 For more details about the Llama 2 family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/llama2).
19
 
20
- 🔨 Looking for an even more powerful model? Check out the [13B version](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat) or the large [70B model demo](https://huggingface.co/spaces/ysharma/Explore_llamav2_with_TGI).
21
  """
22
 
23
- LICENSE = """
24
- <p/>
25
-
26
- ---
27
- As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
28
- this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  """
30
 
31
  if not torch.cuda.is_available():
32
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
33
 
34
 
35
- if torch.cuda.is_available():
36
- model_id = "DAMO-NLP-SG/CLEX-7b-Chat-16K"
37
- # from CLEX import LlamaForCausalLM
38
- from transformers import AutoModelForCausalLM
39
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
40
- tokenizer = AutoTokenizer.from_pretrained(model_id)
41
- tokenizer.use_default_system_prompt = False
42
 
43
  import PyPDF2
44
  from io import BytesIO
@@ -199,7 +211,7 @@ with gr.Blocks(css="style.css") as demo:
199
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
200
 
201
  chat_interface.render()
202
- gr.Markdown(LICENSE)
203
 
204
  if __name__ == "__main__":
205
  demo.queue(max_size=20).launch(share=False)
 
11
  MAX_INPUT_TOKEN_LENGTH = 4096
12
 
13
  DESCRIPTION = """\
14
+ # CLEX-7B-Chat-16K
15
 
16
+ This Space demonstrates model [CLEX-7B-Chat-16K](https://huggingface.co/DAMO-NLP-SG/CLEX-7B-Chat-16K), a Llama-2-7B model fine-tuned using our [CLEX](https://arxiv.org/abs/2310.16450) method. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
17
 
18
+ The model supports the maximun input sequence length of 64k now.
19
 
 
20
  """
21
 
22
+ # LICENSE = """
23
+ # <p/>
24
+
25
+ # ---
26
+ # As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-2-7b-chat) by Meta,
27
+ # this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
28
+ # """
29
+
30
+
31
+ CITE = """
32
+ If you find our project useful, hope you can star our repo and cite our paper as follows:
33
+ ```
34
+ @article{damonlpsg2023clex,
35
+ author = {Chen, Guanzheng and Li, Xin and Meng, Zaiqiao and Liang, Shangsong and Bing, Lidong},
36
+ title = {CLEX: Continuous Length Extrapolation for Large Language Models},
37
+ year = 2023,
38
+ url = {https://arxiv.org/abs/2310.16450}
39
+ }
40
+ ```
41
  """
42
 
43
  if not torch.cuda.is_available():
44
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
45
 
46
 
47
+ # if torch.cuda.is_available():
48
+ model_id = "DAMO-NLP-SG/CLEX-7b-Chat-16K"
49
+ # from CLEX import LlamaForCausalLM
50
+ from transformers import AutoModelForCausalLM
51
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
52
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
53
+ tokenizer.use_default_system_prompt = False
54
 
55
  import PyPDF2
56
  from io import BytesIO
 
211
  gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
212
 
213
  chat_interface.render()
214
+ gr.Markdown(CITE)
215
 
216
  if __name__ == "__main__":
217
  demo.queue(max_size=20).launch(share=False)