iofu728 commited on
Commit
7ea924f
·
1 Parent(s): c68af66

Feature(MInference): update information

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -13,7 +13,10 @@ from transformers.utils.import_utils import _is_package_available
13
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
14
 
15
 
16
- DESCRIPTION = """# MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention (Under Review, ES-FoMo @ ICML'24)
 
 
 
17
 
18
  _Huiqiang Jiang†, Yucheng Li†, Chengruidong Zhang†, Qianhui Wu, Xufang Luo, Surin Ahn, Zhenhua Han, Amir H. Abdi, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
19
 
@@ -73,7 +76,7 @@ if torch.cuda.is_available() and _is_package_available("pycuda"):
73
  terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
74
 
75
 
76
- @spaces.GPU(duration=120)
77
  def chat_llama3_8b(
78
  message: str, history: list, temperature: float, max_new_tokens: int
79
  ) -> str:
 
13
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
14
 
15
 
16
+ DESCRIPTION = """
17
+ <div>
18
+ <h1>MInference 1.0: Accelerating Pre-filling for Long-Context LLMs via Dynamic Sparse Attention (Under Review, ES-FoMo @ ICML'24)</h1>
19
+ </div>
20
 
21
  _Huiqiang Jiang†, Yucheng Li†, Chengruidong Zhang†, Qianhui Wu, Xufang Luo, Surin Ahn, Zhenhua Han, Amir H. Abdi, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
22
 
 
76
  terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
77
 
78
 
79
+ # @spaces.GPU(duration=120)
80
  def chat_llama3_8b(
81
  message: str, history: list, temperature: float, max_new_tokens: int
82
  ) -> str: