Guanzheng commited on
Commit
e02c5de
·
1 Parent(s): 5d8ca76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -15,7 +15,9 @@ DESCRIPTION = """\
15
 
16
  This Space demonstrates model [CLEX-7B-Chat-16K](https://huggingface.co/DAMO-NLP-SG/CLEX-7B-Chat-16K), a Llama-2-7B model fine-tuned using our [CLEX](https://arxiv.org/abs/2310.16450) method. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
17
 
18
- The model supports the maximun input sequence length of 64k now.
 
 
19
 
20
  """
21
 
@@ -35,6 +37,7 @@ If you find our project useful, hope you can star our repo and cite our paper as
35
  author = {Chen, Guanzheng and Li, Xin and Meng, Zaiqiao and Liang, Shangsong and Bing, Lidong},
36
  title = {CLEX: Continuous Length Extrapolation for Large Language Models},
37
  year = 2023,
 
38
  url = {https://arxiv.org/abs/2310.16450}
39
  }
40
  ```
@@ -128,7 +131,7 @@ def generate(
128
  # for user, assistant in chat_history:
129
  # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
130
  # conversation.append({"role": "user", "content": message})
131
- print(prompt[500:1000])
132
  # chat = tokenizer.apply_chat_template(conversation, tokenize=False)
133
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
134
  if len(inputs) > MAX_INPUT_TOKEN_LENGTH:
 
15
 
16
  This Space demonstrates model [CLEX-7B-Chat-16K](https://huggingface.co/DAMO-NLP-SG/CLEX-7B-Chat-16K), a Llama-2-7B model fine-tuned using our [CLEX](https://arxiv.org/abs/2310.16450) method. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
17
 
18
+ The web demo supports the maximum input sequence length of 10k now (probably OOM).
19
+
20
+ This support of PDF input is tentative.
21
 
22
  """
23
 
 
37
  author = {Chen, Guanzheng and Li, Xin and Meng, Zaiqiao and Liang, Shangsong and Bing, Lidong},
38
  title = {CLEX: Continuous Length Extrapolation for Large Language Models},
39
  year = 2023,
40
+ journal = {arXiv preprint arXiv:2310.16450},
41
  url = {https://arxiv.org/abs/2310.16450}
42
  }
43
  ```
 
131
  # for user, assistant in chat_history:
132
  # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
133
  # conversation.append({"role": "user", "content": message})
134
+ # print(prompt[500:1000])
135
  # chat = tokenizer.apply_chat_template(conversation, tokenize=False)
136
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cuda")
137
  if len(inputs) > MAX_INPUT_TOKEN_LENGTH: