File size: 9,357 Bytes
e61c5e4
 
 
 
 
 
06033e7
e61c5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
28f601a
 
 
 
 
 
 
 
 
 
e61c5e4
83ace66
 
e61c5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83ace66
e61c5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83ace66
e61c5e4
 
 
 
 
 
 
 
 
 
 
 
 
28f601a
e61c5e4
 
 
 
28f601a
 
e61c5e4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import gradio as gr
from datasets import load_dataset 
from difflib import SequenceMatcher

# Load the dataset
dataset = load_dataset('RobinSta/SynthPAI') 
filtered_data = [entry for entry in dataset['train'] if entry['guesses'] != None][:100] 

# Predefined feature names
feature_names = ['city_country', 'sex', 'age', 'occupation', 'birth_city_country', 'relationship_status', 'income_level', 'education']

# Dict to save user predictions
user_predictions = {}

# Index to keep track of current text
current_index = 0
num_predictions_human = 0
num_predictions_model = 0
model_accuracy = -1
human_accuracy = 0

def cleanup (req: gr.Request):
    global current_index, num_predictions_human, num_predictions_model, model_accuracy, human_accuracy 
    current_index = 0
    num_predictions_human = 0
    num_predictions_model = 0
    model_accuracy = -1
    human_accuracy = 0
    total_acc_human = 0
    total_acc_model = 0

def show_entry_and_calculate_accuracy(hidden_box, *args): 
    global current_index, num_predictions_human, num_predictions_model, model_accuracy, human_accuracy
    pre_filled_values = [""] * len(feature_names)
    entry = filtered_data[current_index] 
    user_predictions = {}
    for i, attr in enumerate(feature_names):
        estimated_val = args[i]
        user_predictions[attr] = estimated_val.strip().lower()
    name = entry["text"]
    user_guesses = user_predictions#.get(current_index, {})
    if current_index >= 0:
        entry_acc = filtered_data[current_index]
        correct_guesses = filtered_data[current_index]['guesses']
        profile = entry_acc['profile']
        if correct_guesses:
            for guess in correct_guesses:
                feature = guess['feature']
                model_guesses = guess['guesses']
                true_value = profile.get(feature)
                if len(model_guesses) > 0:
                    num_predictions_model += 1
                if true_value and str(true_value).lower() in map(str.lower, model_guesses):
                    model_accuracy += 1
    if (current_index-1) >= 0:
        entry_acc = filtered_data[current_index-1]
        profile = entry_acc['profile']
        if user_guesses:
            for feature, guess in user_guesses.items():
                true_value = profile.get(feature)
                if len(str(guess)) > 0:
                    num_predictions_human += 1
                if true_value and str(true_value).lower() == str(guess).lower():
                    human_accuracy += 1
    else:
        human_accuracy = 0
    hidden_box = entry['profile']
    current_index += 1
    if num_predictions_human == 0:
        total_acc_human = 0
    else:
        total_acc_human = round(human_accuracy / num_predictions_human, 2) * 100
    if num_predictions_model == 0:
        total_acc_model = 0
    else:
        total_acc_model = round(model_accuracy / num_predictions_model, 2) * 100
    accuracy = 'Number of correct guesses made by you: ' + str(human_accuracy) + '\nYour accuracy ' + str(total_acc_human) + '%\n' + 'Number of correct guesses made by LLM: ' + str(model_accuracy)+ '\nModel accuracy ' + str(total_acc_model) + '%'
    return accuracy, hidden_box, name, *pre_filled_values

with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown(
                """
                # Welcome to SynthPAI inference space! <a href="https://www.sri.inf.ethz.ch/"><img width="100" alt="portfolio_view" align="right" src="http://safeai.ethz.ch/img/sri-logo.svg"></a>
                You can test you private attribute inference skills and compare your results with GPT-4 on our synthetic comments here.
                In the row below you can comment text (on the left), from which you can infer some information about the author. In the middle below you will be shown your and GPT-4 accuracy. On the right you can check the real author's profile (but that would be cheating!).
                Click the button `Submit` at the bottom to get the next comment.
                Have fun!
                """)
    with gr.Column():
        with gr.Row(equal_height=True):
            name = gr.Textbox(label="Comment text", value="")
            outputs = []
            with gr.Row(equal_height=True):
                accuracy = gr.Textbox(label="Accuracy")
            with gr.Row(equal_height=True):
                with gr.Accordion(label="Author's real profile", open=False):
                        hidden_box = gr.Textbox(
                            label="You can take a look here for correct guesses, but that would be cheating :)",
                            value="",
                            max_lines=5,
                        )
            inputs = []
    with gr.Column():
        with gr.Row(equal_height=True):
            for i, attr in enumerate(feature_names):
                with gr.Column():
                    if attr == "city_country":
                        input = gr.Textbox(
                            label="Location (City / State / Country)",
                            value="",
                            placeholder="Answer example: Rome, Italy",
                            info="Give the closest city / state / country in format [City, Country]",
                        )
                    elif attr == "sex":
                        input = gr.Textbox(
                            label="Gender",
                            value="",
                            placeholder="Answer example: female",
                            info="Gender of the author (choose from male/female)",
                        )
                    elif attr == "age":
                        input = gr.Textbox(
                            label="Age",
                            value="",
                            placeholder="Answer example: 25",
                            info="Age in years, either explicit, e.g. 25, or a range, e.g. 20-30",
                        )
                    elif attr == "occupation":
                        input = gr.Textbox(
                            label="Occupation",
                            value="",
                            placeholder="Answer example: Architect",
                            info="Brief Occupation Descriptor, e.g. 'Software Engineer'",
                        )
                    elif attr == "birth_city_country":
                        input = gr.Textbox(
                            label="Place of Birth",
                            value="",
                            placeholder="Answer example: Milan, Italy",
                            info="Give the closest city / state / country in format [City, Country]",
                        )
                    elif attr == "relationship_status":
                        input = gr.Textbox(
                            label="Marital Status",
                            value="",
                            placeholder="Answer example: married",
                            info="Relationship status of the person. Choose from married, single, in a relationship, widowed, divorced.",
                        )
                    elif attr == "income_level":
                        input = gr.Textbox(
                            label="Income",
                            value="",
                            placeholder="Answer example: High",
                            info="Annual Income Level - No: No Income\nLow: < 30k\nMedium: 30k - 60k\nHigh: 60k - 150k\nVery High: > 150k",
                        )
                    elif attr == "education":
                        input = gr.Textbox(
                            label="Education Level",
                            value="",
                            placeholder="Answer example: Bachelor's degree in Fashion Design",
                            max_lines=1,
                            info="Highest level of education. Answer in format [_ degree in _]",
                            )
                    else:
                        raise Exception(f"Unknown attribute {attr}")
                    inputs.append(input)

    with gr.Row(equal_height=True):
        btn = gr.Button("Submit")
        btn.click(fn=show_entry_and_calculate_accuracy, inputs=[hidden_box, *inputs], outputs=[accuracy, hidden_box, name, *inputs])
    with gr.Row(equal_height=True):
        gr.Markdown(
                """
                <span style="font-size:0.5em;"><p style="text-align: center;">
                This space was created to showcase the dataset SynthPAI, created for paper **A Synthetic Dataset for Personal Attribute Inference** (Yukhymenko, Staab, Vero, Vechev).</p></span>
                """)
        gr.Markdown(
                """        
                <span style="font-size:0.5em;"><p align="center"> [Arxiv paper](https://arxiv.org/abs/2406.07217)<br/>
                [HuggingFace dataset](https://huggingface.co/datasets/RobinSta/SynthPAI)<br/>
                [Papers With Code](https://paperswithcode.com/paper/a-synthetic-dataset-for-personal-attribute)<br/>
                </p></span>
                """)
        
    def hello_world():
        gr.Info("Hi! This space was created by authors of the dataset SynthPAI. Click the button 'Submit' to start.")
        return "hello world"

    demo.load(hello_world)
    demo.unload(cleanup)
    
demo.queue().launch()