tskolm's picture
Update app.py
afa237b
raw
history blame
3.14 kB
import numpy as np
import os
import streamlit as st
import sys
import urllib
import json
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
def generate(tokenizer, model, text, features):
generated = tokenizer("<|startoftext|> <|titlestart|>{}<|titleend|><|authornamebegin|>".format(text), return_tensors="pt").input_ids
sample_outputs = model.generate(
generated, do_sample=True, top_k=50,
max_length=features['max_length'], top_p=features['top_p'], temperature=features['t'] / 100.0, num_return_sequences=features['num'],
)
for i, sample_output in enumerate(sample_outputs):
decoded = tokenizer.decode(sample_output, skip_special_tokens=False)
autor, text = decoded.split('<|authornamebegin|>')[1].split('<|authornameend|>')
st.markdown('**' + author.strip() + '**: ' + text.replace('<|endoftext|>', '').replace('<|pad|>', '').strip())
def load_model():
tokenizer = torch.load('./tokenizer.pt')
config = GPT2Config.from_json_file('./config.json')
model = GPT2LMHeadModel(config)
state_dict = torch.load('./pytorch_model.bin', map_location=torch.device('cpu'))
model.load_state_dict(state_dict)
return tokenizer, model
def main():
tokenizer, model = load_model()
st.title("YouTube comments generating project")
st.header('YouTube comments generator')
st.sidebar.title("Features")
seed = 27834096
default_control_features = ["Количество комментариев", "Температура", "Top-p"]
control_features = default_control_features
# Insert user-controlled values from sliders into the feature vector.
features = {
"num": st.sidebar.slider("Количество комментариев", 0, 20, 1, 1),
"t": st.sidebar.slider("Температура", 0, 300, 180, 1),
"top_p": st.sidebar.slider("Top-p", 0, 100, 95, 5),
"max_length": st.sidebar.slider("Максимальная длина комментария", 0, 300, 100, 5),
}
st.sidebar.title("Note")
st.sidebar.write(
"""
Изменяя значения, можно получить различные выводы модели
"""
)
st.sidebar.write(
"""
Значение температуры делится на 100
"""
)
st.sidebar.caption(f"Streamlit version `{st.__version__}`")
with st.form(key='my_form'):
url = st.text_input('Введите url видео на YouTube')
st.form_submit_button('Готово!')
if url:
params = {"format": "json", "url": url}
base_url = "https://www.youtube.com/oembed"
query_string = urllib.parse.urlencode(params)
base_url = base_url + "?" + query_string
with urllib.request.urlopen(base_url) as response:
response_text = response.read()
data = json.loads(response_text.decode())
st.write('Video Title: ' + data['title'])
st.video(url)
generate(tokenizer, model, data['title'], features)
if __name__ == "__main__":
main()