zR commited on
Commit
85c524d
·
1 Parent(s): b5b22a9
Files changed (1) hide show
  1. app.py +21 -13
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import (
@@ -30,7 +35,10 @@ def convert_to_txt(file):
30
  model_name = "THUDM/LongCite-glm4-9b"
31
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
  model = AutoModelForCausalLM.from_pretrained(
33
- model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto"
 
 
 
34
  )
35
 
36
  html_styles = """<style>
@@ -110,20 +118,20 @@ def convert_to_html(statements, clicked=-1):
110
  html += "\n"
111
  if clicked == i:
112
  clicked_cite_html = (
113
- html_styles
114
- + """<br><span class="label">Citations of current statement:</span><br><div style="overflow-y: auto; padding: 20px; border: 0px dashed black; border-radius: 6px; background-color: #EFF2F6;">{}</div>""".format(
115
- "<br><br>\n".join(cite_html)
116
- )
117
  )
118
- all_cite_html = (
119
- html_styles
120
- + """<br><span class="label">All citations:</span><br>\n<div style="overflow-y: auto; padding: 20px; border: 0px dashed black; border-radius: 6px; background-color: #EFF2F6;">{}</div>""".format(
121
- "<br><br>\n".join(all_cite_html).replace(
122
- '<span class="highlight">', "<span>"
123
  )
124
- if len(all_cite_html)
125
- else "No citation in the answer"
 
 
 
126
  )
 
 
 
127
  )
128
  return html, all_cite_html, clicked_cite_html, cite_num2idx
129
 
@@ -136,7 +144,7 @@ def render_context(file):
136
  raise gr.Error(f"ERROR: no uploaded document")
137
 
138
 
139
- @spaces.GPU()
140
  def run_llm(context, query):
141
  if not context:
142
  raise gr.Error("Error: no uploaded document")
 
1
+ import subprocess
2
+
3
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
4
+ shell=True)
5
+
6
  import gradio as gr
7
  import torch
8
  from transformers import (
 
35
  model_name = "THUDM/LongCite-glm4-9b"
36
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
37
  model = AutoModelForCausalLM.from_pretrained(
38
+ model_name,
39
+ torch_dtype=torch.bfloat16,
40
+ trust_remote_code=True,
41
+ attn_implementation="flash_attention_2",
42
  )
43
 
44
  html_styles = """<style>
 
118
  html += "\n"
119
  if clicked == i:
120
  clicked_cite_html = (
121
+ html_styles
122
+ + """<br><span class="label">Citations of current statement:</span><br><div style="overflow-y: auto; padding: 20px; border: 0px dashed black; border-radius: 6px; background-color: #EFF2F6;">{}</div>""".format(
123
+ "<br><br>\n".join(cite_html)
 
124
  )
 
 
 
 
 
125
  )
126
+ all_cite_html = (
127
+ html_styles
128
+ + """<br><span class="label">All citations:</span><br>\n<div style="overflow-y: auto; padding: 20px; border: 0px dashed black; border-radius: 6px; background-color: #EFF2F6;">{}</div>""".format(
129
+ "<br><br>\n".join(all_cite_html).replace(
130
+ '<span class="highlight">', "<span>"
131
  )
132
+ if len(all_cite_html)
133
+ else "No citation in the answer"
134
+ )
135
  )
136
  return html, all_cite_html, clicked_cite_html, cite_num2idx
137
 
 
144
  raise gr.Error(f"ERROR: no uploaded document")
145
 
146
 
147
+ @spaces.GPU(duration=120)
148
  def run_llm(context, query):
149
  if not context:
150
  raise gr.Error("Error: no uploaded document")