File size: 8,653 Bytes
140793a 0b15f14 140793a 0b15f14 140793a 0b15f14 140793a fca63f5 0b15f14 7b98782 140793a c683fd1 140793a b562a52 a4f602f 58024e1 a4f602f 58024e1 0fc355c a4f602f d41d3c0 a4f602f d41d3c0 b562a52 a7a18ce b562a52 2a75905 b562a52 2a75905 6f44849 b562a52 d41d3c0 b562a52 d41d3c0 83a6345 dc4eae6 140793a dc4eae6 83a6345 140793a dc4eae6 140793a 83a6345 140793a 83a6345 f85192b 16d8914 a4f602f f85192b a4f602f b2b04da 75b57b8 875c9b5 a4f602f b0e60d8 b2b04da 2c9f508 a4f602f 58024e1 b0e60d8 a4f602f f85192b e96a163 ced62d8 f84c25c e2b8bf1 ac25f50 e2b8bf1 ac25f50 e2b8bf1 9c1abb5 140793a ced62d8 140793a 0b15f14 140793a 0b15f14 140793a 736d538 0b15f14 140793a 72270c7 b7adee1 3eabe69 140793a 72270c7 8657d51 72270c7 b456778 3eabe69 140793a ced62d8 140793a ced62d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import json
import os
import shutil
import requests
import gradio as gr
from huggingface_hub import Repository, InferenceClient
HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
BOT_NAME = "Falcon"
STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
EXAMPLES = [["climate change"], ["2308.15699"], ["hallucination"], ["2308.00205"], ["large language model"], ["2308.05204"], ["2308.10873"], ["2308.06355"],["2308.01684"],["2308.00352"],["2308.07773"]]
client = InferenceClient(
API_URL,
headers={"Authorization": f"Bearer {HF_TOKEN}"},
)
id_dict = {}
for i in range(0,4):
fname = "arxiv_2023_" + str(i)
with open(fname, "r") as f:
for line in f:
D = json.loads(line)
id_dict[D['id']] = D
def format_prompt_summarize(message, history, system_prompt, keyword):
prompt = ""
prompt += "System: You are scholarly RESEARCH ASSISTANT who can read the ARXIV scholarly article.\n"
prompt += "User: READ ALL THE TITLEs and ABSTRACTs of various article below\n"
prompt += "Generate a SUMMARY of all the articles below relevant to the research for the field of \"" + keyword + "\"\n"
prompt += "SUGGEST FIVE IMPORTANT FINDINGS or ORIGINAL CONTRIBUTIONS of OBSERVATIONs for the field of \"" + keyword + "\" that summarizes the work.\n"
prompt += "Each BULLET POINT must be be less than 15 WORDS. \n"
prompt += "Output the FIVE KEY FINDINGS as BULLET POINTS with UNDERLINE OR BOLDEN KEY PHRASES.\n"
prompt += "Propose ONE CREATIVE ACTIONABLE IDEA for FUTURE extension of the RESEARCH\n. You MUST output the CREATIVE IDEA with a BULB OR IDEA OR THINKING emoji.\n"
prompt += "Output ONE CREATIVE IDEA for FUTURE extension with a RANDOM emoji\n"
prompt += "Choose an UNRELATED or ORTHOGONAL field where the FINDINGS of the article can be applied.\n"
prompt += "In a new line, OUTPUT ONE CRAZY IDEA in 20 WORDS how the KEY FINDINGS of RESEARCH article can be applied in an ORTHOGONAL or UNRELATED FIELD with a CRAZY IDEA emoji \n"
prompt += message + "\n"
mock_prompt = ""
if system_prompt == "":
mock_prompt += f"System: {system_prompt}\n"
for user_prompt, bot_response in history:
mock_prompt += f"User: {user_prompt}\n"
mock_prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
mock_prompt += f"""User: {message}
Falcon:"""
return prompt
def format_prompt(message, history, system_prompt):
prompt = ""
prompt += "System: You are scholarly RESEARCH ASSISTANT who can read the ARXIV scholarly article.\n"
prompt += "READ THE TITLE and ABSTRACT of the article below\n"
prompt += "After understanding the ABSTRACT, SUGGEST 4 IMPORTANT FINDINGS or ORIGINAL CONTRIBUTIONS of OBSERVATIONs that summarizes the work.\n"
prompt += "Each BULLET POINT must be be less than 15 WORDS. \n"
prompt += "Output the FOUR KEY FINDINGS as BULLET POINTS with UNDERLINE OR BOLDEN KEY PHRASES.\n"
prompt += "Propose ONE CREATIVE ACTIONABLE IDEA for FUTURE extension of the RESEARCH\n. You MUST output the CREATIVE IDEA with a BULB OR IDEA OR THINKING emoji.\n"
prompt += "Output ONE CREATIVE IDEA for FUTURE extension with a RANDOM emoji\n"
prompt += "Choose an UNRELATED or ORTHOGONAL field where the FINDINGS of the article can be applied.\n"
prompt += "In a new line, OUTPUT ONE CRAZY IDEA in 20 WORDS how the KEY FINDINGS of RESEARCH article can be applied in an ORTHOGONAL or UNRELATED FIELD with a CRAZY IDEA emoji \n"
prompt += "User:" + message + "\n"
mock_prompt = ""
if system_prompt == "":
mock_prompt += f"System: {system_prompt}\n"
for user_prompt, bot_response in history:
mock_prompt += f"User: {user_prompt}\n"
mock_prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
mock_prompt += f"""User: {message}
Falcon:"""
return prompt
seed = 42
def generate(
prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
global seed
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
stop_sequences=STOP_SEQUENCES,
do_sample=True,
seed=seed,
)
seed = seed + 1
title = "INPUT ARXI ID"
abstract = ""
if prompt in id_dict:
title = id_dict[prompt]['title']
abstract = id_dict[prompt]['abstract']
prompt = f"TITLE: {title} ABSTRACT: {abstract}\n"
output = f"<b>Title: </b> {title} \n <br>"
formatted_prompt = format_prompt(prompt, history, system_prompt)
else:
keyword = prompt
counter= 0
for d in id_dict:
title = id_dict[d]['title']
abstract = id_dict[d]['abstract']
if keyword in title or keyword in abstract:
counter+=1## its a hit
prompt += "ARTICLE " + str(counter) + "\n"
prompt += f"TITLE: {title} ABSTRACT: {abstract}\n"
if counter >= 4:
break
prompt += "Keyword: " + keyword + "\n"
formatted_prompt = format_prompt_summarize(prompt, history, system_prompt, keyword)
output = "Articles related to the keyword " + keyword + "\n"
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
#output = ""
for response in stream:
output += response.token.text
for stop_str in STOP_SEQUENCES:
if output.endswith(stop_str):
output = output[:-len(stop_str)]
output = output.rstrip()
yield output
yield output
return output
additional_inputs=[
gr.Textbox("", label="Optional system prompt"),
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=256,
minimum=0,
maximum=8192,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=0.4):
gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
with gr.Column():
gr.Markdown(
"""
#
** The idea is inspired by CREATIVE WHACK PACK https://apps.apple.com/us/app/creative-whack-pack/id307306326
** ##Researchers need INSPIRATION to come up with CREATIVE IDEAS.
** ###We use Falcon 180B to
<br> - generate a <b>SUMMARY</b> of the arxiv articles (only August articles are supported)
<br> - generate a <b>CREATIVE IDEA </b> for future extension
<br> - generate a </b>CRAZY IDEA</b> for application in an orthogonal field.
This should hopefully CONNECT unrelated fields and inspire researchers to come up with CREATIVE IDEAS.
## Please input ARXIV ID or a query, see examples below (limited to 15K articles from August 2023)
➡️️ **Intended Use**: this demo is intended to showcase how LLMs can be used to generate creative ideas for future extension and application in orthogonal field.
⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
"""
)
gr.ChatInterface(
generate,
examples=EXAMPLES,
additional_inputs=additional_inputs,
)
demo.queue(concurrency_count=100, api_open=False).launch(show_api=False) |