File size: 7,989 Bytes
764ee8b
 
 
 
 
5a6c57a
 
764ee8b
7faf5ad
 
 
4f59140
9c45526
764ee8b
 
5a6c57a
402ce86
5a6c57a
e4251f1
 
 
 
5a6c57a
764ee8b
 
 
5a6c57a
764ee8b
 
 
 
 
5a6c57a
764ee8b
 
 
 
4d014d6
 
1c927ef
4d014d6
 
 
 
764ee8b
 
 
 
 
80c7823
764ee8b
 
4f01b22
 
402ce86
764ee8b
 
 
 
 
 
 
 
 
402ce86
764ee8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a6c57a
764ee8b
5a6c57a
764ee8b
 
402ce86
764ee8b
80c7823
764ee8b
7cc4c5c
 
 
402ce86
 
 
 
 
764ee8b
0a1b04e
402ce86
764ee8b
 
5a6c57a
402ce86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a6c57a
764ee8b
5a6c57a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import gradio as gr
import gc, copy, re
from rwkv.model import RWKV
from rwkv.utils import PIPELINE, PIPELINE_ARGS

ctx_limit = 4096
title = "RWKV-5-World-0.1B-v1-20230803-ctx4096.pth"

# import urllib.request
# url = f"https://huggingface.co/BlinkDL/rwkv-5-world/resolve/main/{title}"
# urllib.request.urlretrieve(url, title)

model = RWKV(model=title, strategy='cpu bf16')
pipeline = PIPELINE(model, "rwkv_vocab_v20230424")

def generate_prompt(instruction, input=None, history=None):
    # parse the chat history into a string of user and assistant messages
    history_str = ""

    if history is not None:
        for pair in history:
            history_str += f"Instruction: {pair[0]}\n\nAssistant: {pair[1]}\n\n"

    instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
    input = input.strip().replace('\r\n','\n').replace('\n\n','\n').replace('\n\n','\n')
    if input and len(input) > 0:
        return f"""{history_str}Instruction: {instruction}

Input: {input}

Response:"""
    else:
        return f"""{history_str}User: {instruction}

Assistant:"""

examples = [
    ["東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。", "", 300, 1.2, 0.5, 0.5, 0.5],
    ["Écrivez un programme Python pour miner 1 Bitcoin, avec des commentaires.", "", 300, 1.2, 0.5, 0.5, 0.5],
    ["Write a song about ravens.", "", 300, 1.2, 0.5, 0.5, 0.5],
    ["Explain the following metaphor: Life is like cats.", "", 300, 1.2, 0.5, 0.5, 0.5],
    ["Write a story using the following information", "A man named Alex chops a tree down", 300, 1.2, 0.5, 0.5, 0.5],
    ["Generate a list of adjectives that describe a person as brave.", "", 300, 1.2, 0.5, 0.5, 0.5],
    ["You have $100, and your goal is to turn that into as much money as possible with AI and Machine Learning. Please respond with detailed plan.", "", 300, 1.2, 0.5, 0.5, 0.5],
]

def evaluate(
    instruction,
    input=None,
    token_count=333,
    temperature=1.0,
    top_p=0.5,
    presencePenalty = 0.5,
    countPenalty = 0.5,
    history=None # add the history parameter to the evaluate function
):
    args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p),
                     alpha_frequency = countPenalty,
                     alpha_presence = presencePenalty,
                     token_ban = [], # ban the generation of some tokens
                     token_stop = [0]) # stop generation whenever you see any token here

    instruction = re.sub(r'\n{2,}', '\n', instruction).strip().replace('\r\n','\n')
    input = re.sub(r'\n{2,}', '\n', input).strip().replace('\r\n','\n')
    ctx = generate_prompt(instruction, input, history) # pass the history to the generate_prompt function
    print(ctx + "\n")
    
    all_tokens = []
    out_last = 0
    out_str = ''
    occurrence = {}
    state = None
    for i in range(int(token_count)):
        out, state = model.forward(pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token], state)
        for n in occurrence:
            out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)

        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
        if token in args.token_stop:
            break
        all_tokens += [token]
        for xxx in occurrence:
            occurrence[xxx] *= 0.996
        if token not in occurrence:
            occurrence[token] = 1
        else:
            occurrence[token] += 1
        
        tmp = pipeline.decode(all_tokens[out_last:])
        if '\ufffd' not in tmp:
            out_str += tmp
            yield out_str.strip()
            out_last = i + 1
        if '\n\n' in out_str:
            break

    del out
    del state
    gc.collect()
    yield out_str.strip()

def user(message, chatbot):
    chatbot = chatbot or []
    return "", chatbot + [[message, None]]

def alternative(chatbot, history):
    if not chatbot or not history:
        return chatbot, history
    
    chatbot[-1][1] = None
    history[0] = copy.deepcopy(history[1])

    return chatbot, history


with gr.Blocks(title=title) as demo:
    gr.HTML(f"<div style=\"text-align: center;\">\n<h1>🌍World - {title}</h1>\n</div>")

    with gr.Tab("Instruct mode"):
        gr.Markdown(f"100% RNN RWKV-LM **trained on 100+ natural languages**. Demo limited to ctxlen {ctx_limit}. For best results, <b>keep your prompt short and clear</b>.")
        with gr.Row():
            with gr.Column():
                instruction = gr.Textbox(lines=2, label="Instruction", value='東京で訪れるべき素晴らしい場所とその紹介をいくつか挙げてください。')
                input = gr.Textbox(lines=2, label="Input", placeholder="")
                token_count = gr.Slider(10, 512, label="Max Tokens", step=10, value=333)
                temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.2)
                top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
                presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0)
                count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.7)
            with gr.Column():
                with gr.Row():
                    submit = gr.Button("Submit", variant="primary")
                    clear = gr.Button("Clear", variant="secondary")
                output = gr.Textbox(label="Output", lines=5)
        data = gr.Dataset(components=[instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty], samples=examples, label="Example Instructions", headers=["Instruction", "Input", "Max Tokens", "Temperature", "Top P", "Presence Penalty", "Count Penalty"])
        submit.click(evaluate, [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty], [output])
        clear.click(lambda: None, [], [output])
        data.click(lambda x: x, [data], [instruction, input, token_count, temperature, top_p, presence_penalty, count_penalty])

    with gr.Tab("Chat mode"):
        with gr.Row():
            chatbot = gr.Chatbot()
            with gr.Column():
                msg = gr.Textbox(scale=4, show_label=False, placeholder="Enter text and press enter", container=False)
                clear = gr.Button("Clear")
            with gr.Column():
                token_count = gr.Slider(10, 512, label="Max Tokens", step=10, value=333)
                temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=1.2)
                top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.3)
                presence_penalty = gr.Slider(0.0, 1.0, label="Presence Penalty", step=0.1, value=0)
                count_penalty = gr.Slider(0.0, 1.0, label="Count Penalty", step=0.1, value=0.7)
            
            def clear_chat():
                return "", []
    
            def user_msg(message, history):
                history = history or []
                return "", history + [[message, None]]
    
            def chat(history):
                # get the last user message and the additional parameters
                message = history[-1][0]
                instruction = msg.value
                token_count = token_count.value
    
                temperature = temperature.value
                top_p = top_p.value
                presence_penalty = presence_penalty.value
                count_penalty = count_penalty.value
    
                response = evaluate(instruction, None, token_count, temperature, top_p, presence_penalty, count_penalty, history)
    
                history[-1][1] = response
                return history
    
    
            msg.submit(user_msg, [msg, chatbot], [msg, chatbot], queue=False).then(
                chat, chatbot, chatbot, api_name="chat"
            )
            clear.click(clear_chat, None, [chatbot], queue=False)

demo.queue(max_size=10)
demo.launch(share=False)