Modfiededition commited on
Commit
39ab779
·
1 Parent(s): 88924af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -80
app.py CHANGED
@@ -38,86 +38,90 @@ dataset = Dataset.from_dict(python_dict)
38
  MAX_LENGTH = 105
39
 
40
  button = st.button('Click here to extract the word/phrase from the text with the given sentiment: {0}..'.format(option))
41
- if button:
42
- with st.spinner('In progress.......'):
43
-
44
- def process_data(examples):
45
- questions = examples["sentiment"]
46
- context = examples["text"]
47
- inputs = tokenizer(
48
- questions,
49
- context,
50
- max_length = MAX_LENGTH,
51
- padding="max_length",
52
- return_offsets_mapping = True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
- # Assigning None values to all offset mapping of tokens which are not the context tokens.
55
- for i in range(len(inputs["input_ids"])):
56
- offset = inputs["offset_mapping"][i]
57
- sequence_ids = inputs.sequence_ids(i)
58
- inputs["offset_mapping"][i] = [
59
- o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
60
- ]
61
- return inputs
62
-
63
- processed_raw_data = dataset.map(
64
- process_data,
65
- batched = True
66
- )
67
- tf_raw_dataset = processed_raw_data.to_tf_dataset(
68
- columns=["input_ids", "attention_mask"],
69
- shuffle=False,
70
- batch_size=1,
71
- )
72
-
73
- # final predictions.
74
- outputs = model.predict(tf_raw_dataset)
75
- start_logits = outputs.start_logits
76
- end_logits = outputs.end_logits
77
-
78
- # Post Processing.
79
- # Using start_logits and end_logits to generate the final answer from the given context.
80
- n_best = 20
81
-
82
- def predict_answers(inputs):
83
- predicted_answer = []
84
- for i in range(len(inputs["offset_mapping"])):
85
- start_logit = inputs["start_logits"][i]
86
- end_logit = inputs["end_logits"][i]
87
- context = inputs["text"][i]
88
- offset = inputs["offset_mapping"][i]
89
- start_indexes = np.argsort(start_logit)[-1: -n_best - 1:-1].tolist()
90
- end_indexes = np.argsort(end_logit)[-1: -n_best - 1: -1].tolist()
91
-
92
- flag = False
93
- for start_index in start_indexes:
94
- for end_index in end_indexes:
95
- # skip answer that are not in the context.
96
- if offset[start_index] is None or offset[end_index] is None:
97
- continue
98
- # skip answer with length that is either < 0
99
- if end_index < start_index:
100
- continue
101
- flag = True
102
- answer = context[offset[start_index][0]: offset[end_index][1]]
103
  predicted_answer.append(answer)
104
- break
105
- if flag:
106
- break
107
- if not flag:
108
- predicted_answer.append(answer)
109
- return {"predicted_answer":predicted_answer}
110
-
111
- processed_raw_data.set_format("pandas")
 
 
 
112
 
113
- processed_raw_df = processed_raw_data[:]
114
- processed_raw_df["start_logits"] = start_logits.tolist()
115
- processed_raw_df["end_logits"] = end_logits.tolist()
116
- processed_raw_df["text"] = python_dict["text"]
117
-
118
- final_data = Dataset.from_pandas(processed_raw_df)
119
- final_data = final_data.map(predict_answers,batched=True)
120
-
121
-
122
-
123
- st.markdown("## " +final_data["predicted_answer"][0])
 
38
  MAX_LENGTH = 105
39
 
40
  button = st.button('Click here to extract the word/phrase from the text with the given sentiment: {0}..'.format(option))
41
+
42
+ if not textbox:
43
+ st.markdown("## " + "Please write your text above!")
44
+ else:
45
+ if button:
46
+ with st.spinner('In progress.......'):
47
+
48
+ def process_data(examples):
49
+ questions = examples["sentiment"]
50
+ context = examples["text"]
51
+ inputs = tokenizer(
52
+ questions,
53
+ context,
54
+ max_length = MAX_LENGTH,
55
+ padding="max_length",
56
+ return_offsets_mapping = True,
57
+ )
58
+ # Assigning None values to all offset mapping of tokens which are not the context tokens.
59
+ for i in range(len(inputs["input_ids"])):
60
+ offset = inputs["offset_mapping"][i]
61
+ sequence_ids = inputs.sequence_ids(i)
62
+ inputs["offset_mapping"][i] = [
63
+ o if sequence_ids[k] == 1 else None for k, o in enumerate(offset)
64
+ ]
65
+ return inputs
66
+
67
+ processed_raw_data = dataset.map(
68
+ process_data,
69
+ batched = True
70
  )
71
+ tf_raw_dataset = processed_raw_data.to_tf_dataset(
72
+ columns=["input_ids", "attention_mask"],
73
+ shuffle=False,
74
+ batch_size=1,
75
+ )
76
+
77
+ # final predictions.
78
+ outputs = model.predict(tf_raw_dataset)
79
+ start_logits = outputs.start_logits
80
+ end_logits = outputs.end_logits
81
+
82
+ # Post Processing.
83
+ # Using start_logits and end_logits to generate the final answer from the given context.
84
+ n_best = 20
85
+
86
+ def predict_answers(inputs):
87
+ predicted_answer = []
88
+ for i in range(len(inputs["offset_mapping"])):
89
+ start_logit = inputs["start_logits"][i]
90
+ end_logit = inputs["end_logits"][i]
91
+ context = inputs["text"][i]
92
+ offset = inputs["offset_mapping"][i]
93
+ start_indexes = np.argsort(start_logit)[-1: -n_best - 1:-1].tolist()
94
+ end_indexes = np.argsort(end_logit)[-1: -n_best - 1: -1].tolist()
95
+
96
+ flag = False
97
+ for start_index in start_indexes:
98
+ for end_index in end_indexes:
99
+ # skip answer that are not in the context.
100
+ if offset[start_index] is None or offset[end_index] is None:
101
+ continue
102
+ # skip answer with length that is either < 0
103
+ if end_index < start_index:
104
+ continue
105
+ flag = True
106
+ answer = context[offset[start_index][0]: offset[end_index][1]]
107
+ predicted_answer.append(answer)
108
+ break
109
+ if flag:
110
+ break
111
+ if not flag:
 
 
 
 
 
 
 
 
112
  predicted_answer.append(answer)
113
+ return {"predicted_answer":predicted_answer}
114
+
115
+ processed_raw_data.set_format("pandas")
116
+
117
+ processed_raw_df = processed_raw_data[:]
118
+ processed_raw_df["start_logits"] = start_logits.tolist()
119
+ processed_raw_df["end_logits"] = end_logits.tolist()
120
+ processed_raw_df["text"] = python_dict["text"]
121
+
122
+ final_data = Dataset.from_pandas(processed_raw_df)
123
+ final_data = final_data.map(predict_answers,batched=True)
124
 
125
+
126
+
127
+ st.markdown("## " +final_data["predicted_answer"][0])