marcel1997 commited on
Commit
8183dbf
·
verified ·
1 Parent(s): 5ae56e7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -50
README.md CHANGED
@@ -99,7 +99,7 @@ formats = {
99
  "sensitivity": """<|im_start|>system\nSensitivity<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
100
  "complexity": """<|im_start|>system\nComplexity<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
101
  "entity_detection": """<|im_start|>system\nEntity Detection<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
102
- "entity_swapping": """<|im_start|>system\nEntity Swapping<|im_end|>\n<|im_start|>user\nentities:{entities}\ntext:\n{text}<|im_end|>\n<|im_start|>assistant\n"""
103
  }
104
 
105
  def model_inference(text, mode="anonymization", max_new_tokens=2028, config=None, entity_mapping=None, return_entities=False, reverse_mapping=False):
@@ -288,26 +288,26 @@ def select_entities_based_on_json(prediction_json, entity_json):
288
 
289
  Example text
290
  ```
291
- We have a community picnic this Saturday at Greenfield Park, 11 AM. RSVP by Thursday, Write me an e-mail annoucment!
292
  ```
293
 
294
  The sensitivity analysis feature evaluates the sensitivity of a given text and the complexitivity feature rates the complexity.
295
  ```python
296
- text = "We have a community picnic this Saturday at Greenfield Park, 11 AM. RSVP by Thursday, Write me an e-mail annoucment!"
297
 
298
  # Generate sensitivity score
299
  sensitivity_score = model_inference(text, mode="sensitivity")
300
- print(f"Sensitivity Score: {sensitivity_score}" "\n")
301
 
302
  # Generate complexity score
303
  complexity_score = model_inference(text, mode="complexity")
304
- print(f"Complexity: {complexity_score}" "\n")
305
  ```
306
 
307
  Output
308
  ```
309
  Sensitivity Score: 0
310
- Complexity Score: 3
311
  ```
312
 
313
  ### 3. Anonymization and Re-Anonymization
@@ -325,58 +325,20 @@ print(f"Anonymized Text: {anonymized_text}\n")
325
  # Restore the original text
326
  anonymized_text, entity_mapping = model_inference(text, mode="anonymization", return_entities=True)
327
  print(f"Entity Mapping:\n{entity_mapping}\n")
328
- print(f"Anonymized Text: {anonymized_text}\n")
329
  restored_text = model_inference(anonymized_text, mode="entity_swapping", entity_mapping=entity_mapping, reverse_mapping=True)
330
  print(f"Restored Text: {restored_text}")
331
  ```
332
 
333
  Output
334
  ```
335
- Anonymized Text: We have a community picnic this Saturday at Maplewood Park, Late Morning. RSVP by Thursday, Write me an e-mail announcement.
336
 
337
- Restored Text: We have a community picnic this Saturday at Greenfield Park, 11 AM. RSVP by Thursday, Write me an e-mail announcement.
338
- ```
 
 
339
 
340
- This is how the stored entitiy maps looks like
341
- ```python
342
- print(f"{json.dumps(entity_mapping, indent=4)}")
343
- ```
344
- Output
345
- ```
346
- {
347
- "Greenfield Park": {
348
- "TYPE": "LOC",
349
- "RANDOM": "Maplewood Park",
350
- "GENERAL": [
351
- [
352
- "Local Park",
353
- "3"
354
- ],
355
- [
356
- "Public Park",
357
- "5"
358
- ],
359
- [
360
- "Recreational Area",
361
- "7"
362
- ]
363
- ]
364
- },
365
- "11 AM": {
366
- "TYPE": "DATETIME",
367
- "RANDOM": "1 PM",
368
- "GENERAL": [
369
- [
370
- "Late Morning",
371
- "2"
372
- ],
373
- [
374
- "A",
375
- "4"
376
- ]
377
- ]
378
- }
379
- }
380
  ```
381
 
382
  Normally you would process the annonymized version with a LLM and than reanonymize the result back.
 
99
  "sensitivity": """<|im_start|>system\nSensitivity<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
100
  "complexity": """<|im_start|>system\nComplexity<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
101
  "entity_detection": """<|im_start|>system\nEntity Detection<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n""",
102
+ "entity_swapping": """<|im_start|>system\nEntity Swapping<|im_end|>\n<|im_start|>user\nentities:\n{entities}\ntext:\n{text}<|im_end|>\n<|im_start|>assistant\n"""
103
  }
104
 
105
  def model_inference(text, mode="anonymization", max_new_tokens=2028, config=None, entity_mapping=None, return_entities=False, reverse_mapping=False):
 
288
 
289
  Example text
290
  ```
291
+ We have a community picnic at Greenfield Park, it is on thursday at 11 AM. Write me an e-mail annoucment!
292
  ```
293
 
294
  The sensitivity analysis feature evaluates the sensitivity of a given text and the complexitivity feature rates the complexity.
295
  ```python
296
+ text = "We have a community picnic at Greenfield Park, it is on thursday at 11 AM. Write me an e-mail annoucment!"
297
 
298
  # Generate sensitivity score
299
  sensitivity_score = model_inference(text, mode="sensitivity")
300
+ print(f"Sensitivity Score: {sensitivity_score}")
301
 
302
  # Generate complexity score
303
  complexity_score = model_inference(text, mode="complexity")
304
+ print(f"Complexity: {complexity_score}")
305
  ```
306
 
307
  Output
308
  ```
309
  Sensitivity Score: 0
310
+ Complexity: 3
311
  ```
312
 
313
  ### 3. Anonymization and Re-Anonymization
 
325
  # Restore the original text
326
  anonymized_text, entity_mapping = model_inference(text, mode="anonymization", return_entities=True)
327
  print(f"Entity Mapping:\n{entity_mapping}\n")
 
328
  restored_text = model_inference(anonymized_text, mode="entity_swapping", entity_mapping=entity_mapping, reverse_mapping=True)
329
  print(f"Restored Text: {restored_text}")
330
  ```
331
 
332
  Output
333
  ```
334
+ Anonymized Text: We have a community picnic at Sunnyvale Park, it is on A Day of the Week at Morning. Write me an e-mail announcement!
335
 
336
+ Entity Mapping:
337
+ Greenfield Park : Sunnyvale Park
338
+ thursday : A Day of the Week
339
+ 11 AM : Morning
340
 
341
+ Restored Text: We have a community picnic at Greenfield Park, it is on thursday at 11 AM. Write me an e-mail announcement!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  ```
343
 
344
  Normally you would process the annonymized version with a LLM and than reanonymize the result back.