Spaces:
Sleeping
Sleeping
| # coding=utf-8 | |
| # Copyright 2018 DPR Authors, The Hugging Face Team. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| """ PyTorch DPR model for Open Domain Question Answering.""" | |
| from dataclasses import dataclass | |
| from typing import Optional, Tuple, Union | |
| import torch | |
| from torch import Tensor, nn | |
| from ...file_utils import ( | |
| ModelOutput, | |
| add_start_docstrings, | |
| add_start_docstrings_to_model_forward, | |
| replace_return_docstrings, | |
| ) | |
| from ...modeling_outputs import BaseModelOutputWithPooling | |
| from ...modeling_utils import PreTrainedModel | |
| from ...utils import logging | |
| from ..bert.modeling_bert import BertModel | |
| from .configuration_dpr import DPRConfig | |
| logger = logging.get_logger(__name__) | |
| _CONFIG_FOR_DOC = "DPRConfig" | |
| _CHECKPOINT_FOR_DOC = "facebook/dpr-ctx_encoder-single-nq-base" | |
| DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [ | |
| "facebook/dpr-ctx_encoder-single-nq-base", | |
| "facebook/dpr-ctx_encoder-multiset-base", | |
| ] | |
| DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST = [ | |
| "facebook/dpr-question_encoder-single-nq-base", | |
| "facebook/dpr-question_encoder-multiset-base", | |
| ] | |
| DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST = [ | |
| "facebook/dpr-reader-single-nq-base", | |
| "facebook/dpr-reader-multiset-base", | |
| ] | |
| ########## | |
| # Outputs | |
| ########## | |
| class DPRContextEncoderOutput(ModelOutput): | |
| """ | |
| Class for outputs of :class:`~transformers.DPRQuestionEncoder`. | |
| Args: | |
| pooler_output: (:obj:``torch.FloatTensor`` of shape ``(batch_size, embeddings_size)``): | |
| The DPR encoder outputs the `pooler_output` that corresponds to the context representation. Last layer | |
| hidden-state of the first token of the sequence (classification token) further processed by a Linear layer. | |
| This output is to be used to embed contexts for nearest neighbors queries with questions embeddings. | |
| hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) | |
| of shape :obj:`(batch_size, sequence_length, hidden_size)`. | |
| Hidden-states of the model at the output of each layer plus the initial embedding outputs. | |
| attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, | |
| sequence_length, sequence_length)`. | |
| Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads. | |
| """ | |
| pooler_output: torch.FloatTensor | |
| hidden_states: Optional[Tuple[torch.FloatTensor]] = None | |
| attentions: Optional[Tuple[torch.FloatTensor]] = None | |
| class DPRQuestionEncoderOutput(ModelOutput): | |
| """ | |
| Class for outputs of :class:`~transformers.DPRQuestionEncoder`. | |
| Args: | |
| pooler_output: (:obj:``torch.FloatTensor`` of shape ``(batch_size, embeddings_size)``): | |
| The DPR encoder outputs the `pooler_output` that corresponds to the question representation. Last layer | |
| hidden-state of the first token of the sequence (classification token) further processed by a Linear layer. | |
| This output is to be used to embed questions for nearest neighbors queries with context embeddings. | |
| hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) | |
| of shape :obj:`(batch_size, sequence_length, hidden_size)`. | |
| Hidden-states of the model at the output of each layer plus the initial embedding outputs. | |
| attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, | |
| sequence_length, sequence_length)`. | |
| Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads. | |
| """ | |
| pooler_output: torch.FloatTensor | |
| hidden_states: Optional[Tuple[torch.FloatTensor]] = None | |
| attentions: Optional[Tuple[torch.FloatTensor]] = None | |
| class DPRReaderOutput(ModelOutput): | |
| """ | |
| Class for outputs of :class:`~transformers.DPRQuestionEncoder`. | |
| Args: | |
| start_logits: (:obj:``torch.FloatTensor`` of shape ``(n_passages, sequence_length)``): | |
| Logits of the start index of the span for each passage. | |
| end_logits: (:obj:``torch.FloatTensor`` of shape ``(n_passages, sequence_length)``): | |
| Logits of the end index of the span for each passage. | |
| relevance_logits: (:obj:`torch.FloatTensor`` of shape ``(n_passages, )``): | |
| Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage to answer the | |
| question, compared to all the other passages. | |
| hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) | |
| of shape :obj:`(batch_size, sequence_length, hidden_size)`. | |
| Hidden-states of the model at the output of each layer plus the initial embedding outputs. | |
| attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): | |
| Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, | |
| sequence_length, sequence_length)`. | |
| Attentions weights after the attention softmax, used to compute the weighted average in the self-attention | |
| heads. | |
| """ | |
| start_logits: torch.FloatTensor | |
| end_logits: torch.FloatTensor = None | |
| relevance_logits: torch.FloatTensor = None | |
| hidden_states: Optional[Tuple[torch.FloatTensor]] = None | |
| attentions: Optional[Tuple[torch.FloatTensor]] = None | |
| class DPREncoder(PreTrainedModel): | |
| base_model_prefix = "bert_model" | |
| def __init__(self, config: DPRConfig): | |
| super().__init__(config) | |
| self.bert_model = BertModel(config) | |
| assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero" | |
| self.projection_dim = config.projection_dim | |
| if self.projection_dim > 0: | |
| self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim) | |
| self.init_weights() | |
| def forward( | |
| self, | |
| input_ids: Tensor, | |
| attention_mask: Optional[Tensor] = None, | |
| token_type_ids: Optional[Tensor] = None, | |
| inputs_embeds: Optional[Tensor] = None, | |
| output_attentions: bool = False, | |
| output_hidden_states: bool = False, | |
| return_dict: bool = False, | |
| ) -> Union[BaseModelOutputWithPooling, Tuple[Tensor, ...]]: | |
| outputs = self.bert_model( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| token_type_ids=token_type_ids, | |
| inputs_embeds=inputs_embeds, | |
| output_attentions=output_attentions, | |
| output_hidden_states=output_hidden_states, | |
| return_dict=return_dict, | |
| ) | |
| sequence_output, pooled_output = outputs[:2] | |
| pooled_output = sequence_output[:, 0, :] | |
| if self.projection_dim > 0: | |
| pooled_output = self.encode_proj(pooled_output) | |
| if not return_dict: | |
| return (sequence_output, pooled_output) + outputs[2:] | |
| return BaseModelOutputWithPooling( | |
| last_hidden_state=sequence_output, | |
| pooler_output=pooled_output, | |
| hidden_states=outputs.hidden_states, | |
| attentions=outputs.attentions, | |
| ) | |
| def embeddings_size(self) -> int: | |
| if self.projection_dim > 0: | |
| return self.encode_proj.out_features | |
| return self.bert_model.config.hidden_size | |
| def init_weights(self): | |
| self.bert_model.init_weights() | |
| if self.projection_dim > 0: | |
| self.encode_proj.apply(self.bert_model._init_weights) | |
| class DPRSpanPredictor(PreTrainedModel): | |
| base_model_prefix = "encoder" | |
| def __init__(self, config: DPRConfig): | |
| super().__init__(config) | |
| self.encoder = DPREncoder(config) | |
| self.qa_outputs = nn.Linear(self.encoder.embeddings_size, 2) | |
| self.qa_classifier = nn.Linear(self.encoder.embeddings_size, 1) | |
| self.init_weights() | |
| def forward( | |
| self, | |
| input_ids: Tensor, | |
| attention_mask: Tensor, | |
| inputs_embeds: Optional[Tensor] = None, | |
| output_attentions: bool = False, | |
| output_hidden_states: bool = False, | |
| return_dict: bool = False, | |
| ) -> Union[DPRReaderOutput, Tuple[Tensor, ...]]: | |
| # notations: N - number of questions in a batch, M - number of passages per questions, L - sequence length | |
| n_passages, sequence_length = input_ids.size() if input_ids is not None else inputs_embeds.size()[:2] | |
| # feed encoder | |
| outputs = self.encoder( | |
| input_ids, | |
| attention_mask=attention_mask, | |
| inputs_embeds=inputs_embeds, | |
| output_attentions=output_attentions, | |
| output_hidden_states=output_hidden_states, | |
| return_dict=return_dict, | |
| ) | |
| sequence_output = outputs[0] | |
| # compute logits | |
| logits = self.qa_outputs(sequence_output) | |
| start_logits, end_logits = logits.split(1, dim=-1) | |
| start_logits = start_logits.squeeze(-1).contiguous() | |
| end_logits = end_logits.squeeze(-1).contiguous() | |
| relevance_logits = self.qa_classifier(sequence_output[:, 0, :]) | |
| # resize | |
| start_logits = start_logits.view(n_passages, sequence_length) | |
| end_logits = end_logits.view(n_passages, sequence_length) | |
| relevance_logits = relevance_logits.view(n_passages) | |
| if not return_dict: | |
| return (start_logits, end_logits, relevance_logits) + outputs[2:] | |
| return DPRReaderOutput( | |
| start_logits=start_logits, | |
| end_logits=end_logits, | |
| relevance_logits=relevance_logits, | |
| hidden_states=outputs.hidden_states, | |
| attentions=outputs.attentions, | |
| ) | |
| def init_weights(self): | |
| self.encoder.init_weights() | |
| ################## | |
| # PreTrainedModel | |
| ################## | |
| class DPRPretrainedContextEncoder(PreTrainedModel): | |
| """ | |
| An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained | |
| models. | |
| """ | |
| config_class = DPRConfig | |
| load_tf_weights = None | |
| base_model_prefix = "ctx_encoder" | |
| _keys_to_ignore_on_load_missing = [r"position_ids"] | |
| def init_weights(self): | |
| self.ctx_encoder.init_weights() | |
| class DPRPretrainedQuestionEncoder(PreTrainedModel): | |
| """ | |
| An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained | |
| models. | |
| """ | |
| config_class = DPRConfig | |
| load_tf_weights = None | |
| base_model_prefix = "question_encoder" | |
| _keys_to_ignore_on_load_missing = [r"position_ids"] | |
| def init_weights(self): | |
| self.question_encoder.init_weights() | |
| class DPRPretrainedReader(PreTrainedModel): | |
| """ | |
| An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained | |
| models. | |
| """ | |
| config_class = DPRConfig | |
| load_tf_weights = None | |
| base_model_prefix = "span_predictor" | |
| _keys_to_ignore_on_load_missing = [r"position_ids"] | |
| def init_weights(self): | |
| self.span_predictor.encoder.init_weights() | |
| self.span_predictor.qa_classifier.apply(self.span_predictor.encoder.bert_model._init_weights) | |
| self.span_predictor.qa_outputs.apply(self.span_predictor.encoder.bert_model._init_weights) | |
| ############### | |
| # Actual Models | |
| ############### | |
| DPR_START_DOCSTRING = r""" | |
| This model inherits from :class:`~transformers.PreTrainedModel`. Check the superclass documentation for the generic | |
| methods the library implements for all its model (such as downloading or saving, resizing the input embeddings, | |
| pruning heads etc.) | |
| This model is also a PyTorch `torch.nn.Module <https://pytorch.org/docs/stable/nn.html#torch.nn.Module>`__ | |
| subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to | |
| general usage and behavior. | |
| Parameters: | |
| config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model. | |
| Initializing with a config file does not load the weights associated with the model, only the | |
| configuration. Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model | |
| weights. | |
| """ | |
| DPR_ENCODERS_INPUTS_DOCSTRING = r""" | |
| Args: | |
| input_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`): | |
| Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be | |
| formatted with [CLS] and [SEP] tokens as follows: | |
| (a) For sequence pairs (for a pair title+text for example): | |
| :: | |
| tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] | |
| token_type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 | |
| (b) For single sequences (for a question for example): | |
| :: | |
| tokens: [CLS] the dog is hairy . [SEP] | |
| token_type_ids: 0 0 0 0 0 0 0 | |
| DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right | |
| rather than the left. | |
| Indices can be obtained using :class:`~transformers.DPRTokenizer`. See | |
| :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__` for | |
| details. | |
| `What are input IDs? <../glossary.html#input-ids>`__ | |
| attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): | |
| Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: | |
| - 1 for tokens that are **not masked**, | |
| - 0 for tokens that are **masked**. | |
| `What are attention masks? <../glossary.html#attention-mask>`__ | |
| token_type_ids (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`): | |
| Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, | |
| 1]``: | |
| - 0 corresponds to a `sentence A` token, | |
| - 1 corresponds to a `sentence B` token. | |
| `What are token type IDs? <../glossary.html#token-type-ids>`_ | |
| inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`): | |
| Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. | |
| This is useful if you want more control over how to convert :obj:`input_ids` indices into associated | |
| vectors than the model's internal embedding lookup matrix. | |
| output_attentions (:obj:`bool`, `optional`): | |
| Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned | |
| tensors for more detail. | |
| output_hidden_states (:obj:`bool`, `optional`): | |
| Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for | |
| more detail. | |
| return_dict (:obj:`bool`, `optional`): | |
| Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. | |
| """ | |
| DPR_READER_INPUTS_DOCSTRING = r""" | |
| Args: | |
| input_ids: (:obj:`Tuple[torch.LongTensor]` of shapes :obj:`(n_passages, sequence_length)`): | |
| Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question | |
| and 2) the passages titles and 3) the passages texts To match pretraining, DPR :obj:`input_ids` sequence | |
| should be formatted with [CLS] and [SEP] with the format: | |
| ``[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>`` | |
| DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right | |
| rather than the left. | |
| Indices can be obtained using :class:`~transformers.DPRReaderTokenizer`. See this class documentation for | |
| more details. | |
| `What are input IDs? <../glossary.html#input-ids>`__ | |
| attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(n_passages, sequence_length)`, `optional`): | |
| Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: | |
| - 1 for tokens that are **not masked**, | |
| - 0 for tokens that are **masked**. | |
| `What are attention masks? <../glossary.html#attention-mask>`__ | |
| inputs_embeds (:obj:`torch.FloatTensor` of shape :obj:`(n_passages, sequence_length, hidden_size)`, `optional`): | |
| Optionally, instead of passing :obj:`input_ids` you can choose to directly pass an embedded representation. | |
| This is useful if you want more control over how to convert :obj:`input_ids` indices into associated | |
| vectors than the model's internal embedding lookup matrix. | |
| output_attentions (:obj:`bool`, `optional`): | |
| Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned | |
| tensors for more detail. | |
| output_hidden_states (:obj:`bool`, `optional`): | |
| Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for | |
| more detail. | |
| return_dict (:obj:`bool`, `optional`): | |
| Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. | |
| """ | |
| class DPRContextEncoder(DPRPretrainedContextEncoder): | |
| def __init__(self, config: DPRConfig): | |
| super().__init__(config) | |
| self.config = config | |
| self.ctx_encoder = DPREncoder(config) | |
| self.init_weights() | |
| def forward( | |
| self, | |
| input_ids: Optional[Tensor] = None, | |
| attention_mask: Optional[Tensor] = None, | |
| token_type_ids: Optional[Tensor] = None, | |
| inputs_embeds: Optional[Tensor] = None, | |
| output_attentions=None, | |
| output_hidden_states=None, | |
| return_dict=None, | |
| ) -> Union[DPRContextEncoderOutput, Tuple[Tensor, ...]]: | |
| r""" | |
| Return: | |
| Examples:: | |
| >>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer | |
| >>> tokenizer = DPRContextEncoderTokenizer.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') | |
| >>> model = DPRContextEncoder.from_pretrained('facebook/dpr-ctx_encoder-single-nq-base') | |
| >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] | |
| >>> embeddings = model(input_ids).pooler_output | |
| """ | |
| output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | |
| output_hidden_states = ( | |
| output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | |
| ) | |
| return_dict = return_dict if return_dict is not None else self.config.use_return_dict | |
| if input_ids is not None and inputs_embeds is not None: | |
| raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") | |
| elif input_ids is not None: | |
| input_shape = input_ids.size() | |
| elif inputs_embeds is not None: | |
| input_shape = inputs_embeds.size()[:-1] | |
| else: | |
| raise ValueError("You have to specify either input_ids or inputs_embeds") | |
| device = input_ids.device if input_ids is not None else inputs_embeds.device | |
| if attention_mask is None: | |
| attention_mask = ( | |
| torch.ones(input_shape, device=device) | |
| if input_ids is None | |
| else (input_ids != self.config.pad_token_id) | |
| ) | |
| if token_type_ids is None: | |
| token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device) | |
| outputs = self.ctx_encoder( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| token_type_ids=token_type_ids, | |
| inputs_embeds=inputs_embeds, | |
| output_attentions=output_attentions, | |
| output_hidden_states=output_hidden_states, | |
| return_dict=return_dict, | |
| ) | |
| if not return_dict: | |
| return outputs[1:] | |
| return DPRContextEncoderOutput( | |
| pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions | |
| ) | |
| class DPRQuestionEncoder(DPRPretrainedQuestionEncoder): | |
| def __init__(self, config: DPRConfig): | |
| super().__init__(config) | |
| self.config = config | |
| self.question_encoder = DPREncoder(config) | |
| self.init_weights() | |
| def forward( | |
| self, | |
| input_ids: Optional[Tensor] = None, | |
| attention_mask: Optional[Tensor] = None, | |
| token_type_ids: Optional[Tensor] = None, | |
| inputs_embeds: Optional[Tensor] = None, | |
| output_attentions=None, | |
| output_hidden_states=None, | |
| return_dict=None, | |
| ) -> Union[DPRQuestionEncoderOutput, Tuple[Tensor, ...]]: | |
| r""" | |
| Return: | |
| Examples:: | |
| >>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer | |
| >>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained('facebook/dpr-question_encoder-single-nq-base') | |
| >>> model = DPRQuestionEncoder.from_pretrained('facebook/dpr-question_encoder-single-nq-base') | |
| >>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors='pt')["input_ids"] | |
| >>> embeddings = model(input_ids).pooler_output | |
| """ | |
| output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | |
| output_hidden_states = ( | |
| output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | |
| ) | |
| return_dict = return_dict if return_dict is not None else self.config.use_return_dict | |
| if input_ids is not None and inputs_embeds is not None: | |
| raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") | |
| elif input_ids is not None: | |
| input_shape = input_ids.size() | |
| elif inputs_embeds is not None: | |
| input_shape = inputs_embeds.size()[:-1] | |
| else: | |
| raise ValueError("You have to specify either input_ids or inputs_embeds") | |
| device = input_ids.device if input_ids is not None else inputs_embeds.device | |
| if attention_mask is None: | |
| attention_mask = ( | |
| torch.ones(input_shape, device=device) | |
| if input_ids is None | |
| else (input_ids != self.config.pad_token_id) | |
| ) | |
| if token_type_ids is None: | |
| token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device) | |
| outputs = self.question_encoder( | |
| input_ids=input_ids, | |
| attention_mask=attention_mask, | |
| token_type_ids=token_type_ids, | |
| inputs_embeds=inputs_embeds, | |
| output_attentions=output_attentions, | |
| output_hidden_states=output_hidden_states, | |
| return_dict=return_dict, | |
| ) | |
| if not return_dict: | |
| return outputs[1:] | |
| return DPRQuestionEncoderOutput( | |
| pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions | |
| ) | |
| class DPRReader(DPRPretrainedReader): | |
| def __init__(self, config: DPRConfig): | |
| super().__init__(config) | |
| self.config = config | |
| self.span_predictor = DPRSpanPredictor(config) | |
| self.init_weights() | |
| def forward( | |
| self, | |
| input_ids: Optional[Tensor] = None, | |
| attention_mask: Optional[Tensor] = None, | |
| inputs_embeds: Optional[Tensor] = None, | |
| output_attentions: bool = None, | |
| output_hidden_states: bool = None, | |
| return_dict=None, | |
| ) -> Union[DPRReaderOutput, Tuple[Tensor, ...]]: | |
| r""" | |
| Return: | |
| Examples:: | |
| >>> from transformers import DPRReader, DPRReaderTokenizer | |
| >>> tokenizer = DPRReaderTokenizer.from_pretrained('facebook/dpr-reader-single-nq-base') | |
| >>> model = DPRReader.from_pretrained('facebook/dpr-reader-single-nq-base') | |
| >>> encoded_inputs = tokenizer( | |
| ... questions=["What is love ?"], | |
| ... titles=["Haddaway"], | |
| ... texts=["'What Is Love' is a song recorded by the artist Haddaway"], | |
| ... return_tensors='pt' | |
| ... ) | |
| >>> outputs = model(**encoded_inputs) | |
| >>> start_logits = outputs.stat_logits | |
| >>> end_logits = outputs.end_logits | |
| >>> relevance_logits = outputs.relevance_logits | |
| """ | |
| output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions | |
| output_hidden_states = ( | |
| output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states | |
| ) | |
| return_dict = return_dict if return_dict is not None else self.config.use_return_dict | |
| if input_ids is not None and inputs_embeds is not None: | |
| raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") | |
| elif input_ids is not None: | |
| input_shape = input_ids.size() | |
| elif inputs_embeds is not None: | |
| input_shape = inputs_embeds.size()[:-1] | |
| else: | |
| raise ValueError("You have to specify either input_ids or inputs_embeds") | |
| device = input_ids.device if input_ids is not None else inputs_embeds.device | |
| if attention_mask is None: | |
| attention_mask = torch.ones(input_shape, device=device) | |
| return self.span_predictor( | |
| input_ids, | |
| attention_mask, | |
| inputs_embeds=inputs_embeds, | |
| output_attentions=output_attentions, | |
| output_hidden_states=output_hidden_states, | |
| return_dict=return_dict, | |
| ) | |