Spaces:
Runtime error
Runtime error
# Copyright 2020 The HuggingFace Team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import unittest | |
from transformers import ( | |
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, | |
AutoModelForTableQuestionAnswering, | |
AutoTokenizer, | |
TableQuestionAnsweringPipeline, | |
TFAutoModelForTableQuestionAnswering, | |
is_torch_available, | |
pipeline, | |
) | |
from transformers.testing_utils import ( | |
is_pipeline_test, | |
require_pandas, | |
require_tensorflow_probability, | |
require_tf, | |
require_torch, | |
slow, | |
) | |
if is_torch_available(): | |
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_12 | |
else: | |
is_torch_greater_or_equal_than_1_12 = False | |
class TQAPipelineTests(unittest.TestCase): | |
# Putting it there for consistency, but TQA do not have fast tokenizer | |
# which are needed to generate automatic tests | |
model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING | |
def test_small_model_tf(self): | |
model_id = "lysandre/tiny-tapas-random-wtq" | |
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
self.assertIsInstance(model.config.aggregation_labels, dict) | |
self.assertIsInstance(model.config.no_aggregation_label_index, int) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query="how many movies has george clooney played in?", | |
) | |
self.assertEqual( | |
outputs, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
], | |
) | |
outputs = table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
query=[ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most" | |
" active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
], | |
) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table=None) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table="") | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table={}) | |
with self.assertRaises(ValueError): | |
table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query="", | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query=None, | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
def test_small_model_pt(self): | |
model_id = "lysandre/tiny-tapas-random-wtq" | |
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
self.assertIsInstance(model.config.aggregation_labels, dict) | |
self.assertIsInstance(model.config.no_aggregation_label_index, int) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query="how many movies has george clooney played in?", | |
) | |
self.assertEqual( | |
outputs, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
], | |
) | |
outputs = table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
query=[ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most" | |
" active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, | |
], | |
) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table=None) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table="") | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table={}) | |
with self.assertRaises(ValueError): | |
table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query="", | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query=None, | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
def test_slow_tokenizer_sqa_pt(self): | |
model_id = "lysandre/tiny-tapas-random-sqa" | |
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
inputs = { | |
"table": { | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
} | |
sequential_outputs = table_querier(**inputs, sequential=True) | |
batch_outputs = table_querier(**inputs, sequential=False) | |
self.assertEqual(len(sequential_outputs), 3) | |
self.assertEqual(len(batch_outputs), 3) | |
self.assertEqual(sequential_outputs[0], batch_outputs[0]) | |
self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) | |
# self.assertNotEqual(sequential_outputs[2], batch_outputs[2]) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query="how many movies has george clooney played in?", | |
) | |
self.assertEqual( | |
outputs, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
], | |
) | |
outputs = table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
query=[ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most" | |
" active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
], | |
) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table=None) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table="") | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table={}) | |
with self.assertRaises(ValueError): | |
table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query="", | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query=None, | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
def test_slow_tokenizer_sqa_tf(self): | |
model_id = "lysandre/tiny-tapas-random-sqa" | |
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
inputs = { | |
"table": { | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
} | |
sequential_outputs = table_querier(**inputs, sequential=True) | |
batch_outputs = table_querier(**inputs, sequential=False) | |
self.assertEqual(len(sequential_outputs), 3) | |
self.assertEqual(len(batch_outputs), 3) | |
self.assertEqual(sequential_outputs[0], batch_outputs[0]) | |
self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) | |
# self.assertNotEqual(sequential_outputs[2], batch_outputs[2]) | |
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query="how many movies has george clooney played in?", | |
) | |
self.assertEqual( | |
outputs, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
) | |
outputs = table_querier( | |
table={ | |
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], | |
"age": ["56", "45", "59"], | |
"number of movies": ["87", "53", "69"], | |
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
}, | |
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, | |
], | |
) | |
outputs = table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
query=[ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most" | |
" active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
], | |
) | |
self.assertEqual( | |
outputs, | |
[ | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, | |
], | |
) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table=None) | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table="") | |
with self.assertRaises(ValueError): | |
table_querier(query="What does it do with empty context ?", table={}) | |
with self.assertRaises(ValueError): | |
table_querier( | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query="", | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
with self.assertRaises(ValueError): | |
table_querier( | |
query=None, | |
table={ | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
}, | |
) | |
def test_integration_wtq_pt(self): | |
table_querier = pipeline("table-question-answering") | |
data = { | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
queries = [ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
] | |
results = table_querier(data, queries) | |
expected_results = [ | |
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, | |
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, | |
{ | |
"answer": "COUNT > Transformers, Datasets, Tokenizers", | |
"coordinates": [(0, 0), (1, 0), (2, 0)], | |
"cells": ["Transformers", "Datasets", "Tokenizers"], | |
"aggregator": "COUNT", | |
}, | |
{ | |
"answer": "AVERAGE > 36542, 4512, 3934", | |
"coordinates": [(0, 1), (1, 1), (2, 1)], | |
"cells": ["36542", "4512", "3934"], | |
"aggregator": "AVERAGE", | |
}, | |
{ | |
"answer": "SUM > 36542, 4512, 3934", | |
"coordinates": [(0, 1), (1, 1), (2, 1)], | |
"cells": ["36542", "4512", "3934"], | |
"aggregator": "SUM", | |
}, | |
] | |
self.assertListEqual(results, expected_results) | |
def test_integration_wtq_tf(self): | |
model_id = "google/tapas-base-finetuned-wtq" | |
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer) | |
data = { | |
"Repository": ["Transformers", "Datasets", "Tokenizers"], | |
"Stars": ["36542", "4512", "3934"], | |
"Contributors": ["651", "77", "34"], | |
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], | |
} | |
queries = [ | |
"What repository has the largest number of stars?", | |
"Given that the numbers of stars defines if a repository is active, what repository is the most active?", | |
"What is the number of repositories?", | |
"What is the average number of stars?", | |
"What is the total amount of stars?", | |
] | |
results = table_querier(data, queries) | |
expected_results = [ | |
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, | |
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, | |
{ | |
"answer": "COUNT > Transformers, Datasets, Tokenizers", | |
"coordinates": [(0, 0), (1, 0), (2, 0)], | |
"cells": ["Transformers", "Datasets", "Tokenizers"], | |
"aggregator": "COUNT", | |
}, | |
{ | |
"answer": "AVERAGE > 36542, 4512, 3934", | |
"coordinates": [(0, 1), (1, 1), (2, 1)], | |
"cells": ["36542", "4512", "3934"], | |
"aggregator": "AVERAGE", | |
}, | |
{ | |
"answer": "SUM > 36542, 4512, 3934", | |
"coordinates": [(0, 1), (1, 1), (2, 1)], | |
"cells": ["36542", "4512", "3934"], | |
"aggregator": "SUM", | |
}, | |
] | |
self.assertListEqual(results, expected_results) | |
def test_integration_sqa_pt(self): | |
table_querier = pipeline( | |
"table-question-answering", | |
model="google/tapas-base-finetuned-sqa", | |
tokenizer="google/tapas-base-finetuned-sqa", | |
) | |
data = { | |
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], | |
"Age": ["56", "45", "59"], | |
"Number of movies": ["87", "53", "69"], | |
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
} | |
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"] | |
results = table_querier(data, queries, sequential=True) | |
expected_results = [ | |
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]}, | |
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]}, | |
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]}, | |
] | |
self.assertListEqual(results, expected_results) | |
def test_integration_sqa_tf(self): | |
model_id = "google/tapas-base-finetuned-sqa" | |
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
table_querier = pipeline( | |
"table-question-answering", | |
model=model, | |
tokenizer=tokenizer, | |
) | |
data = { | |
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], | |
"Age": ["56", "45", "59"], | |
"Number of movies": ["87", "53", "69"], | |
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
} | |
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"] | |
results = table_querier(data, queries, sequential=True) | |
expected_results = [ | |
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]}, | |
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]}, | |
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]}, | |
] | |
self.assertListEqual(results, expected_results) | |
def test_large_model_pt_tapex(self): | |
model_id = "microsoft/tapex-large-finetuned-wtq" | |
table_querier = pipeline( | |
"table-question-answering", | |
model=model_id, | |
) | |
data = { | |
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], | |
"Age": ["56", "45", "59"], | |
"Number of movies": ["87", "53", "69"], | |
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], | |
} | |
queries = [ | |
"How many movies has George Clooney played in?", | |
"How old is Mr Clooney ?", | |
"What's the date of birth of Leonardo ?", | |
] | |
results = table_querier(data, queries, sequential=True) | |
expected_results = [ | |
{"answer": " 69"}, | |
{"answer": " 59"}, | |
{"answer": " 10 june 1996"}, | |
] | |
self.assertListEqual(results, expected_results) | |