erikjm commited on
Commit
97ca63a
·
verified ·
1 Parent(s): cd65b86

Upload 4 files

Browse files
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from interface_utils import *
4
+
5
+ maxim = 'quality'
6
+ submaxims = ["The response is factual and supported by adequate evidence whenever possible."]
7
+ checkbox_choices = [
8
+ ["Yes", "No", "NA"]
9
+ ]
10
+
11
+ conversation_data = load_from_jsonl('./data/conversations_unlabeled.jsonl')
12
+ max_conversation_length = max([len(conversation['transcript']) for conversation in conversation_data])
13
+ conversation = get_conversation(conversation_data)
14
+
15
+
16
+ def save_labels(conv_id, skipped, submaxim_0=None):
17
+ data = {
18
+ 'conv_id': conv_id,
19
+ 'maxim': maxim,
20
+ 'skipped': skipped,
21
+ 'submaxim_0': submaxim_0
22
+ }
23
+ os.makedirs("./labels", exist_ok=True)
24
+
25
+ with open(f"./labels/{maxim}_human_labels_{conv_id}.json", 'w') as f:
26
+ json.dump(data, f, indent=4)
27
+
28
+
29
+ def update_interface(new_conversation):
30
+ new_conv_id = new_conversation['conv_id']
31
+ new_transcript = pad_transcript(new_conversation['transcript'], max_conversation_length)
32
+
33
+ markdown_blocks = [None] * max_conversation_length
34
+ for i in range(max_conversation_length):
35
+ if new_transcript[i]['speaker'] != '':
36
+ markdown_blocks[i] = gr.Markdown(f"""  **{new_transcript[i]['speaker']}**:      {new_transcript[i]['response']}""",
37
+ visible=True)
38
+ else:
39
+ markdown_blocks[i] = gr.Markdown("", visible=False)
40
+
41
+ new_last_response = gr.Text(value=get_last_response(new_transcript),
42
+ label="",
43
+ lines=1,
44
+ container=False,
45
+ interactive=False,
46
+ autoscroll=True,
47
+ visible=True)
48
+ new_radio_0_base = gr.Radio(label=submaxims[0],
49
+ choices=checkbox_choices[0],
50
+ value=None,
51
+ visible=True)
52
+ conv_len = gr.Number(value=len(new_transcript), visible=False)
53
+
54
+ return [new_conv_id] + list(markdown_blocks) + [new_last_response] + [new_radio_0_base] + [conv_len]
55
+
56
+
57
+ def submit(*args):
58
+ conv_id = args[0]
59
+ submaxim_0 = args[-2]
60
+
61
+ save_labels(conv_id, skipped=False, submaxim_0=submaxim_0)
62
+
63
+ new_conversation = get_conversation(conversation_data)
64
+ return update_interface(new_conversation)
65
+
66
+
67
+ def skip(*args):
68
+ conv_id = args[0]
69
+ save_labels(conv_id, skipped=True)
70
+
71
+ new_conversation = get_conversation(conversation_data)
72
+ return update_interface(new_conversation)
73
+
74
+
75
+ with gr.Blocks(theme=gr.themes.Default()) as interface:
76
+ conv_id = conversation['conv_id']
77
+ transcript = conversation['transcript']
78
+ conv_len = gr.Number(value=len(transcript), visible=False)
79
+ padded_transcript = pad_transcript(transcript, max_conversation_length)
80
+
81
+ markdown_blocks = [None] * max_conversation_length
82
+ with gr.Column(scale=1, min_width=600):
83
+ with gr.Group():
84
+ gr.Markdown("""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Conversational context** </span>""",
85
+ visible=True)
86
+ for i in range(max_conversation_length):
87
+ markdown_blocks[i] = gr.Markdown(f"""&nbsp;&nbsp;**{padded_transcript[i]['speaker']}**: &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{padded_transcript[i]['response']}""")
88
+ if i >= conv_len.value:
89
+ markdown_blocks[i].visible = False
90
+
91
+ with gr.Row():
92
+ with gr.Group(elem_classes="bottom-aligned-group"):
93
+ speaker_adapted = gr.Markdown(
94
+ f"""<span style='font-size: 16px;'>&nbsp;&nbsp;&nbsp;&nbsp;**Response to label** </span>""",
95
+ visible=True)
96
+ last_response = gr.Textbox(value=get_last_response(transcript),
97
+ label="",
98
+ lines=1,
99
+ container=False,
100
+ interactive=False,
101
+ autoscroll=True,
102
+ visible=True)
103
+ radio_submaxim_0_base = gr.Radio(label=submaxims[0],
104
+ choices=checkbox_choices[0],
105
+ value=None,
106
+ visible=True)
107
+
108
+ submit_button = gr.Button("Submit")
109
+ skip_button = gr.Button("Skip")
110
+
111
+ conv_id_element = gr.Text(value=conv_id, visible=False)
112
+ input_list = [conv_id_element] + \
113
+ markdown_blocks + \
114
+ [last_response] + \
115
+ [radio_submaxim_0_base] + \
116
+ [conv_len]
117
+ submit_button.click(
118
+ fn=submit,
119
+ inputs=input_list,
120
+ outputs=[conv_id_element,
121
+ *markdown_blocks,
122
+ last_response,
123
+ radio_submaxim_0_base,
124
+ conv_len]
125
+ )
126
+ skip_button.click(
127
+ fn=skip,
128
+ inputs=input_list,
129
+ outputs=[conv_id_element,
130
+ *markdown_blocks,
131
+ last_response,
132
+ radio_submaxim_0_base,
133
+ conv_len]
134
+ )
135
+
136
+ css = """
137
+ #textbox_id textarea {
138
+ background-color: white;
139
+ }
140
+
141
+ .bottom-aligned-group {
142
+ display: flex;
143
+ flex-direction: column;
144
+ justify-content: flex-end;
145
+ height: 100%;
146
+ }
147
+ """
148
+ interface.css = css
149
+ interface.launch()
150
+
151
+
data/conversations_unlabeled.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
interface_utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import random
4
+ import uuid
5
+
6
+
7
+ def load_from_jsonl(filename, n=np.inf):
8
+ data = []
9
+ with open(filename, 'r') as file:
10
+ for i, line in enumerate(file):
11
+ if i >= n: # stop after reading n lines
12
+ break
13
+ data.append(json.loads(line))
14
+ return data
15
+
16
+
17
+ def append_id(conversations_no_id):
18
+ conversations = []
19
+ for conversation in conversations_no_id:
20
+ conversations.append({
21
+ 'conv_id': uuid.uuid4().hex,
22
+ 'transcript': conversation['transcript']
23
+ })
24
+ return conversations
25
+
26
+
27
+ def save_to_jsonl(data, filename):
28
+ with open(filename, 'w') as file:
29
+ for item in data:
30
+ json_line = json.dumps(item)
31
+ file.write(json_line + '\n')
32
+
33
+
34
+ def get_conversation(conversation_data):
35
+ conv = random.choice(conversation_data)
36
+ return conv
37
+
38
+
39
+ def pad_transcript(transcript, max_length):
40
+ padding_count = max_length - len(transcript)
41
+ if padding_count > 0:
42
+ for _ in range(padding_count):
43
+ transcript.append({'speaker': '', 'response': ''})
44
+ return transcript
45
+
46
+
47
+ def get_last_response(transcript):
48
+ for turn in reversed(transcript):
49
+ if turn['speaker'] and turn['response']:
50
+ return turn['response']
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ numpy