Spaces:

DeepLearning101
/

IE101TW

Running

File size: 7,186 Bytes

45311fe

# -*- coding: utf-8 -*-
# @Time    : 2023/3/23 1:02 p.m.
# @Author  : Jianing Wang
# @File    : gpt_response.py

import os
import sys
import torch
import openai
import time

"""
Call for GPT-style LLM.
The output format is the same as OpenAI (e.g., GPT-3.5 text-davinci-003)
"""
class GPTResponse:

    def __init__(self, model_type: str, data_path: str) -> None:
        assert model_type in ["gpt2", "gpt3"]
        self.model_type = model_type
        if self.model_type == "gpt3":

            with open(os.path.join(data_path, 'openai_key.txt'), 'r') as f:
                key = f.readline().strip()
                openai.api_key = key

    def call_for_gpt3_response(self, prompt, l, model_name, temp=0, num_log_probs=None, echo=False, n=None):
        """
        call GPT-3 API until result is provided and then return it
        """
        response = None
        received = False
        while not received:
            try:
                response = openai.Completion.create(engine=model_name, prompt=prompt, max_tokens=l, temperature=temp,
                                                    logprobs=num_log_probs, echo=echo, stop='\n', n=n)
                received = True
            except:
                error = sys.exc_info()[0]
                if error == openai.error.InvalidRequestError: # something is wrong: e.g. prompt too long
                    print(f"InvalidRequestError\nPrompt passed in:\n\n{prompt}\n\n")
                    assert False

                print("API error:", error)
                time.sleep(1)
        return response

    def call_for_gpt2_response(self, gpt2_tokenizer, logits, total_sequences, l=10, num_log_probs=None, echo=False, n=None):
        """
        Obtain the prediction logits from gpt2 in local, and convert it to the value that can match the response from OpenAI
        """
        if not echo:
            # get the top tokens and probs for the generated l tokens
            probs = torch.softmax(logits[:,-l-1:], dim=2).cpu()
        else:
            # get the top tokens and probs for the context and the generated l tokens
            probs = torch.softmax(logits, dim=2).cpu()
        # print("probs=", probs)
        top_probs, top_tokens = torch.topk(probs, k=num_log_probs)
        logprobs = torch.log(probs)
        top_log_probs = torch.log(top_probs)

        # create the return value to resemble OpenAI
        return_json = {}
        choices = []
        # print("="*50)
        for batch_id in range(len(logits)):
            curr_json = {}
            # text is just the optional context and next l tokens
            if not echo:
                curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id][-l:], skip_special_tokens=True)
            else:
                curr_json['text'] = gpt2_tokenizer.decode(total_sequences[batch_id], skip_special_tokens=True)

            # fill the return json with the top tokens and probs to match the OpenAI return value.
            if num_log_probs is not None:
                curr_json['logprobs'] = {}
                curr_json['logprobs']['top_logprobs'] = []
                curr_json['logprobs']['token_logprobs'] = []
                curr_json['logprobs']['tokens'] = []
                if not echo:
                    # cutoff the -1 here because the probs are shifted one over for LMs
                    for current_element_top_log_probs, current_element_top_tokens in zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1]):
                        # tokens is a list of the top token at each position
                        curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([current_element_top_tokens[0]]))
                        # token_logprobs is a list of the logprob of the top token at each position
                        curr_json['logprobs']['token_logprobs'].append(current_element_top_log_probs[0].item())
                        # top_logprobs is a list of dicts for the top K tokens. with each entry being {'token_name': log_prob}
                        temp = {}
                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
                            temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
                        curr_json['logprobs']['top_logprobs'].append(temp)
                else:
                    # same as not above but small tweaks
                    # we add null to the front because for the GPT models, they have null probability for the first token
                    # (for some reason they don't have an beginning of sentence token)
                    curr_json['logprobs']['top_logprobs'].append('null')
                    # cutoff the -1 here because the probs are shifted one over for LMs
                    for index, (current_element_top_log_probs, current_element_top_tokens) in enumerate(zip(top_log_probs[batch_id][:-1], top_tokens[batch_id][:-1])):
                        # skip padding tokens
                        if total_sequences[batch_id][index].item() == 50256:
                            continue
                        temp = {}
                        for log_prob, token in zip(current_element_top_log_probs, current_element_top_tokens):
                            temp[gpt2_tokenizer.decode(token.item())] = log_prob.item()
                        curr_json['logprobs']['top_logprobs'].append(temp)
                    for index in range(len(probs[batch_id])):
                        curr_json['logprobs']['tokens'].append(gpt2_tokenizer.decode([total_sequences[batch_id][index]]))
                    curr_json['logprobs']['token_logprobs'].append('null')
                    for index, log_probs_token_position_j in enumerate(logprobs[batch_id][:-1]):
                        # probs are left shifted for LMs
                        curr_json['logprobs']['token_logprobs'].append(log_probs_token_position_j[total_sequences[batch_id][index+1]])

            choices.append(curr_json)
            # print("curr_json=", curr_json)
            '''
            e.g.,
            num_tokens_to_predict=1
            curr_json= {
                'text': ' I', # 当前生成的top词
                'logprobs': {'top_logprobs': [{' I': -3.4267239570617676, '\n': -3.5073862075805664, ...], # top100词及其socre
                'token_logprobs': [-3.4267239570617676], # 当前top词的score
                'tokens': [' I']}
            }
            num_tokens_to_predict=2
            curr_json= {
                'text': '\nThe', # 如果指定生成两个词，则为两个词
                'logprobs': {'top_logprobs': [ # 两个位置对应的预测的score
                    {'\n': -3.186706304550171, '\xa0': -3.222092390060425, ' We': -6.781067848205566, ...},
                    {'The': -2.5251243114471436, '"': -2.857935667037964, ...],
                'token_logprobs': [-3.186706304550171, -2.5251243114471436], # 生成的词的score
                'tokens': ['\n', 'The']}
            }
            '''
        return_json['choices'] = choices
        # print("="*50)
        # print("return_json=", return_json)
        return return_json