Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -151,8 +151,10 @@ def prepare_training_data(qa_pairs, tokenizer):
|
|
151 |
# Here, we assume the context is a single long string.
|
152 |
context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
|
153 |
context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
|
154 |
-
start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
|
155 |
-
|
|
|
|
|
156 |
|
157 |
# Combine all data into a dictionary for each QA pair
|
158 |
encoded_data.append({
|
|
|
151 |
# Here, we assume the context is a single long string.
|
152 |
context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
|
153 |
context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
|
154 |
+
# start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
|
155 |
+
start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
|
156 |
+
|
157 |
+
end_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.eos_token)[0]] # Find first EOS token
|
158 |
|
159 |
# Combine all data into a dictionary for each QA pair
|
160 |
encoded_data.append({
|