Spaces:

BigSalmon
/

GPT2_Most_Probable

Runtime error

File size: 13,425 Bytes

fc757ce
5e48936
fc757ce
5e48936
 
 
 
7671981
5e48936
 
39e9427
5e48936
 
 
d6cc25a
 
a08c530
4b92f55
 
 
 
 
39e9427
 
 
48d2c46
 
 
2158ccf
 
 
5307bf5
 
 
3619d45
 
 
67c005c
a08c530
 
599c53c
67c005c
 
4c3bc8a
 
 
f474d4a
 
 
880a9a9
 
 
0e34b1b
 
 
df1a01f
 
 
51cb87a
 
 
599c53c
 
7671981
599c53c
6c030df
 
 
df7f0f8
 
6c030df
d6cc25a
 
 
5e48936
f370c95
 
cf031ee
f370c95
 
cf031ee
f370c95
 
cf031ee
f370c95
 
 
 
 
 
686dd20
bb89c93
1fef982
30032dc
1f2cac2
 
 
 
 
 
 
 
 
aeb78d5
1f2cac2
 
 
 
46d30f2
1f2cac2
 
46d30f2
1f2cac2
 
 
 
 
 
 
 
 
1796fcd
1f2cac2
1796fcd
aeb78d5
1f2cac2
5f2464c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8a914a
 
 
 
 
 
 
 
 
1fef982
a8a914a
1fef982
a8a914a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f2cac2
5e48936
da55c1c
5e48936
1f2cac2
aeb78d5
a8a914a
5e48936
 
 
e0e7abc
 
 
 
 
 
 
30032dc
e0e7abc
 
 
 
1f2cac2
7eaf946
 
 
5f2464c
1796fcd
7eaf946
1796fcd
7eaf946
 
 
a8a914a

import streamlit as st
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
from transformers.activations import get_activation
from transformers import AutoTokenizer, AutoModelForCausalLM


st.title('GPT2: To see all prompt outlines: https://huggingface.co/BigSalmon/InformalToFormalLincoln64Paraphrase')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

@st.cache(allow_output_mutation=True)
def get_model():

    tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")
    model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln72Paraphrase")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln64Paraphrase")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln60Paraphrase")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/GPTNeo1.3BInformalToFormal")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln55")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln55")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln51")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln51")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln45")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln49")
    
    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln43")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln43")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln41")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln41")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln38")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln38")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln37")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln37")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln36")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln36")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/MediumInformalToFormalLincoln")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln35")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln35")

    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln31")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln31")
    
    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/InformalToFormalLincoln21")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/InformalToFormalLincoln21")
    
    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsOneSent")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsOneSent")
    
    #tokenizer = AutoTokenizer.from_pretrained("BigSalmon/PointsToSentence")
    #model = AutoModelForCausalLM.from_pretrained("BigSalmon/PointsToSentence")
    
    return model, tokenizer
    
model, tokenizer = get_model()

g = """informal english: garage band has made people who know nothing about music good at creating music.
Translated into the Style of Abraham Lincoln: garage band ( offers the uninitiated in music the ability to produce professional-quality compositions / catapults those for whom music is an uncharted art the ability the realize masterpieces / stimulates music novice's competency to yield sublime arrangements / begets individuals of rudimentary musical talent the proficiency to fashion elaborate suites ).

informal english: chrome extensions can make doing regular tasks much easier to get done.
Translated into the Style of Abraham Lincoln: chrome extensions ( yield the boon of time-saving convenience / ( expedite the ability to / unlock the means to more readily ) accomplish everyday tasks / turbocharges the velocity with which one can conduct their obligations ).

informal english: broadband is finally expanding to rural areas, a great development that will thrust them into modern life.
Translated into the Style of Abraham Lincoln: broadband is ( ( finally / at last / after years of delay ) arriving in remote locations / springing to life in far-flung outposts / inching into even the most backwater corners of the nation ) that will leap-frog them into the twenty-first century.

informal english: google translate has made talking to people who do not share your language easier.
Translated into the Style of Abraham Lincoln: google translate ( imparts communicability to individuals whose native tongue differs / mitigates the trials of communication across linguistic barriers / hastens the bridging of semantic boundaries / mollifies the complexity of multilingual communication / avails itself to the internationalization of discussion / flexes its muscles to abet intercultural conversation / calms the tides of linguistic divergence ).

informal english: corn fields are all across illinois, visible once you leave chicago.
Translated into the Style of Abraham Lincoln: corn fields ( permeate illinois / span the state of illinois / ( occupy / persist in ) all corners of illinois / line the horizon of illinois / envelop the landscape of illinois ), manifesting themselves visibly as one ventures beyond chicago.

informal english: """

number_of_outputs = st.sidebar.slider("Number of Outputs", 5, 20)
log_nums = st.sidebar.slider("How Many Log Outputs?", 50, 600)

def BestProbs(prompt):
  prompt = prompt.strip()
  text = tokenizer.encode(prompt)
  myinput, past_key_values = torch.tensor([text]), None
  myinput = myinput
  logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
  logits = logits[0,-1]
  probabilities = torch.nn.functional.softmax(logits)
  best_logits, best_indices = logits.topk(10)
  best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
  for i in best_words[0:10]:
    print("_______")
    st.write(f"${i} $\n")
    f = (f"${i} $\n")
    m = (prompt + f"{i}")
    BestProbs2(m)
  return f

def BestProbs2(prompt):
  prompt = prompt.strip()
  text = tokenizer.encode(prompt)
  myinput, past_key_values = torch.tensor([text]), None
  myinput = myinput
  logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
  logits = logits[0,-1]
  probabilities = torch.nn.functional.softmax(logits)
  best_logits, best_indices = logits.topk(20)
  best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
  for i in best_words[0:20]:
    print(i)
    st.write(i)
    
def LogProbs(prompt):
  col1 = []
  col2 = []
  prompt = prompt.strip()
  text = tokenizer.encode(prompt)
  myinput, past_key_values = torch.tensor([text]), None
  myinput = myinput
  logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
  logits = logits[0,-1]
  probabilities = torch.nn.functional.softmax(logits)
  best_logits, best_indices = logits.topk(10)
  best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
  for i in best_words[0:10]:
    print("_______")
    f = i
    col1.append(f)
    m = (prompt + f"{i}")
    #print("^^" + f + " ^^")
    prompt = m.strip()
    text = tokenizer.encode(prompt)
    myinput, past_key_values = torch.tensor([text]), None
    myinput = myinput
    logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
    logits = logits[0,-1]
    probabilities = torch.nn.functional.softmax(logits)
    best_logits, best_indices = logits.topk(20)
    best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
    for i in best_words[0:20]:
      #print(i)
      col2.append(i)
  #print(col1)
  #print(col2)
  d = {col1[0]: [col2[0], col2[1], col2[2], col2[3], col2[4], col2[5], col2[6], col2[7], col2[8], col2[9], col2[10], col2[11], col2[12], col2[13], col2[14], col2[15], col2[16], col2[17], col2[18], col2[19]],
    col1[1]: [col2[20], col2[21], col2[22], col2[23], col2[24], col2[25], col2[26], col2[27], col2[28], col2[29], col2[30], col2[31], col2[32], col2[33], col2[34], col2[35], col2[36], col2[37], col2[38], col2[39]],
    col1[2]: [col2[40], col2[41], col2[42], col2[43], col2[44], col2[45], col2[46], col2[47], col2[48], col2[49], col2[50], col2[51], col2[52], col2[53], col2[54], col2[55], col2[56], col2[57], col2[58], col2[59]],
    col1[3]: [col2[60], col2[61], col2[62], col2[63], col2[64], col2[65], col2[66], col2[67], col2[68], col2[69], col2[70], col2[71], col2[72], col2[73], col2[74], col2[75], col2[76], col2[77], col2[78], col2[79]],
    col1[4]: [col2[80], col2[81], col2[82], col2[83], col2[84], col2[85], col2[86], col2[87], col2[88], col2[89], col2[90], col2[91], col2[92], col2[93], col2[94], col2[95], col2[96], col2[97], col2[98], col2[99]],
    col1[5]: [col2[100], col2[101], col2[102], col2[103], col2[104], col2[105], col2[106], col2[107], col2[108], col2[109], col2[110], col2[111], col2[112], col2[113], col2[114], col2[115], col2[116], col2[117], col2[118], col2[119]],
    col1[6]: [col2[120], col2[121], col2[122], col2[123], col2[124], col2[125], col2[126], col2[127], col2[128], col2[129], col2[130], col2[131], col2[132], col2[133], col2[134], col2[135], col2[136], col2[137], col2[138], col2[139]],
    col1[7]: [col2[140], col2[141], col2[142], col2[143], col2[144], col2[145], col2[146], col2[147], col2[148], col2[149], col2[150], col2[151], col2[152], col2[153], col2[154], col2[155], col2[156], col2[157], col2[158], col2[159]],
    col1[8]: [col2[160], col2[161], col2[162], col2[163], col2[164], col2[165], col2[166], col2[167], col2[168], col2[169], col2[170], col2[171], col2[172], col2[173], col2[174], col2[175], col2[176], col2[177], col2[178], col2[179]],
    col1[9]: [col2[180], col2[181], col2[182], col2[183], col2[184], col2[185], col2[186], col2[187], col2[188], col2[189], col2[190], col2[191], col2[192], col2[193], col2[194], col2[195], col2[196], col2[197], col2[198], col2[199]]}
  df = pd.DataFrame(data=d)
  print(df)
  st.write(df)
  return df
  
def BestProbs5(prompt):
  prompt = prompt.strip()
  text = tokenizer.encode(prompt)
  myinput, past_key_values = torch.tensor([text]), None
  myinput = myinput
  logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
  logits = logits[0,-1]
  probabilities = torch.nn.functional.softmax(logits)
  best_logits, best_indices = logits.topk(number_of_outputs)
  best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
  for i in best_words[0:number_of_outputs]:
    #print(i)
    print("\n")
    g = (prompt + i)
    st.write(g)
    l = run_generate(g, "hey")
    st.write(l)
    
def run_generate(text, bad_words):
  yo = []
  input_ids = tokenizer.encode(text, return_tensors='pt')
  res = len(tokenizer.encode(text))
  bad_words = bad_words.split()
  bad_word_ids = [[7829], [40940]]
  for bad_word in bad_words: 
    bad_word = " " + bad_word
    ids = tokenizer(bad_word).input_ids
    bad_word_ids.append(ids)
  sample_outputs = model.generate(
    input_ids,
    do_sample=True, 
    max_length= res + 5, 
    min_length = res + 5, 
    top_k=50,
    temperature=1.0,
    num_return_sequences=3,
    bad_words_ids=bad_word_ids
  )
  for i in range(3):
    e = tokenizer.decode(sample_outputs[i])
    e = e.replace(text, "")
    yo.append(e)
  print(yo)
  return yo

with st.form(key='my_form'):
    prompt = st.text_area(label='Enter sentence', value=g, height=500)
    submit_button = st.form_submit_button(label='Submit')
    submit_button2 = st.form_submit_button(label='Fast Forward')
    submit_button3 = st.form_submit_button(label='Fast Forward 2.0')
    submit_button4 = st.form_submit_button(label='Get Top')

    if submit_button:
      with torch.no_grad():
        text = tokenizer.encode(prompt)
        myinput, past_key_values = torch.tensor([text]), None
        myinput = myinput
        myinput= myinput.to(device)
        logits, past_key_values = model(myinput, past_key_values = past_key_values, return_dict=False)
        logits = logits[0,-1]
        probabilities = torch.nn.functional.softmax(logits)
        best_logits, best_indices = logits.topk(log_nums)
        best_words = [tokenizer.decode([idx.item()]) for idx in best_indices]
        text.append(best_indices[0].item())
        best_probabilities = probabilities[best_indices].tolist()
        words = []              
        st.write(best_words)
    if submit_button2:
        print("----")
        st.write("___")
        m = LogProbs(prompt)
        st.write("___")
        st.write(m)
        st.write("___")
    if submit_button3:
        print("----")
        st.write("___")
        st.write(BestProbs)
    if submit_button4:
      BestProbs5(prompt)