Spaces:
Runtime error
Runtime error
File size: 3,498 Bytes
082cde2 40d15de 082cde2 40d15de 2abe7fa 082cde2 4931940 001fb6f 4931940 082cde2 3ee2a8f 082cde2 3ee2a8f a7f975a 082cde2 3ee2a8f 40d15de 082cde2 2abe7fa ff376a0 2abe7fa 3ee2a8f 40d15de ff376a0 40d15de 3ee2a8f a7f975a 3ee2a8f 082cde2 a7f975a 094ce77 a7f975a f14d548 082cde2 cbab253 082cde2 293747f f14d548 a7f975a 082cde2 0aa8e95 082cde2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import gradio as gr
from transformers import (
AutoModelForSeq2SeqLM,
AutoModelForTableQuestionAnswering,
AutoTokenizer,
pipeline,
TapexTokenizer,
BartForConditionalGeneration
)
import pandas as pd
import json
# model_tapex = "microsoft/tapex-large-finetuned-wtq"
# tokenizer_tapex = AutoTokenizer.from_pretrained(model_tapex)
# model_tapex = AutoModelForSeq2SeqLM.from_pretrained(model_tapex)
# pipe_tapex = pipeline(
# "table-question-answering", model=model_tapex, tokenizer=tokenizer_tapex
# )
#new
tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
# model_tapas = "google/tapas-large-finetuned-wtq"
# tokenizer_tapas = AutoTokenizer.from_pretrained(model_tapas)
# model_tapas = AutoModelForTableQuestionAnswering.from_pretrained(model_tapas)
# pipe_tapas = pipeline(
# "table-question-answering", model=model_tapas, tokenizer=tokenizer_tapas
# )
#new
pipe_tapas = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq")
pipe_tapas2 = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wikisql-supervised")
def process2(query, csv_dataStr):
# csv_data={"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], "Number of movies": ["87", "53", "69"]}
csv_data = json.loads(csv_dataStr)
table = pd.DataFrame.from_dict(csv_data)
#microsoft
encoding = tokenizer(table=table, query=query, return_tensors="pt")
outputs = model.generate(**encoding)
result_tapex=tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
#google
result_tapas = pipe_tapas(table=table, query=query)['cells'][0]
#google2
result_tapas2 = pipe_tapas2(table=table, query=query)['cells'][0]
return result_tapex, result_tapas, result_tapas2
# Inputs
query_text = gr.Text(label="")
# input_file = gr.File(label="Upload a CSV file", type="file")
input_data = gr.Text(label="")
# rows_slider = gr.Slider(label="Number of rows")
# Output
answer_text_tapex = gr.Text(label="")
answer_text_tapas = gr.Text(label="")
answer_text_tapas2 = gr.Text(label="")
description = "This Space lets you ask questions on CSV documents with Microsoft [TAPEX-Large](https://huggingface.co/microsoft/tapex-large-finetuned-wtq) and Google [TAPAS-Large](https://huggingface.co/google/tapas-large-finetuned-wtq). \
Both have been fine-tuned on the [WikiTableQuestions](https://huggingface.co/datasets/wikitablequestions) dataset. \n\n\
A sample file with football statistics is available in the repository: \n\n\
* Which team has the most wins? Answer: Manchester City FC\n\
* Which team has the most wins: Chelsea, Liverpool or Everton? Answer: Liverpool\n\
* Which teams have scored less than 40 goals? Answer: Cardiff City FC, Fulham FC, Brighton & Hove Albion FC, Huddersfield Town FC\n\
* What is the average number of wins? Answer: 16 (rounded)\n\n\
You can also upload your own CSV file. Please note that maximum sequence length for both models is 1024 tokens, \
so you may need to limit the number of rows in your CSV file. Chunking is not implemented yet."
iface = gr.Interface(
theme="huggingface",
description=description,
layout="vertical",
fn=process2,
inputs=[query_text, input_data],
outputs=[answer_text_tapex, answer_text_tapas, answer_text_tapas2],
examples=[
],
allow_flagging="never",
)
iface.launch() |