File size: 3,135 Bytes
1a7a06e
 
 
 
 
 
 
afa237b
1a7a06e
 
afa237b
1a7a06e
 
d092e25
1a7a06e
 
afa237b
 
 
1a7a06e
 
 
4719e8c
afa237b
 
 
 
1a7a06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d092e25
1a7a06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d092e25
 
 
 
 
 
 
 
 
 
 
 
 
 
1a7a06e
febdf4e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import os
import streamlit as st
import sys
import urllib
import json
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config

def generate(tokenizer, model, text, features):
    generated = tokenizer("<|startoftext|> <|titlestart|>{}<|titleend|><|authornamebegin|>".format(text), return_tensors="pt").input_ids
    sample_outputs = model.generate(
        generated, do_sample=True, top_k=50, 
        max_length=features['max_length'], top_p=features['top_p'], temperature=features['t'] / 100.0, num_return_sequences=features['num'],
    )
    for i, sample_output in enumerate(sample_outputs):
        decoded = tokenizer.decode(sample_output, skip_special_tokens=False)
        autor, text = decoded.split('<|authornamebegin|>')[1].split('<|authornameend|>')
        st.markdown('**' + author.strip() + '**: ' + text.replace('<|endoftext|>', '').replace('<|pad|>', '').strip())


def load_model():
    tokenizer = torch.load('./tokenizer.pt')
    config = GPT2Config.from_json_file('./config.json')
    model = GPT2LMHeadModel(config)
    state_dict = torch.load('./pytorch_model.bin', map_location=torch.device('cpu'))
    model.load_state_dict(state_dict)
    return tokenizer, model


def main():
    tokenizer, model = load_model()
    st.title("YouTube comments generating project")
    st.header('YouTube comments generator')

    st.sidebar.title("Features")
    seed = 27834096
    default_control_features = ["Количество комментариев", "Температура", "Top-p"]

    control_features = default_control_features

    # Insert user-controlled values from sliders into the feature vector.
    features = {
        "num": st.sidebar.slider("Количество комментариев", 0, 20, 1, 1),
        "t": st.sidebar.slider("Температура", 0, 300, 180, 1),
        "top_p": st.sidebar.slider("Top-p", 0, 100, 95, 5),
        "max_length": st.sidebar.slider("Максимальная длина комментария", 0, 300, 100, 5),
    }

    st.sidebar.title("Note")
    st.sidebar.write(
        """
	Изменяя значения, можно получить различные выводы модели
        """
    )
    st.sidebar.write(
        """
	Значение температуры делится на 100
        """
    )
    st.sidebar.caption(f"Streamlit version `{st.__version__}`")
    with st.form(key='my_form'):
        url = st.text_input('Введите url видео на YouTube')
        st.form_submit_button('Готово!')
        
    if url:
        params = {"format": "json", "url": url}
        base_url = "https://www.youtube.com/oembed"
        query_string = urllib.parse.urlencode(params)
        base_url = base_url + "?" + query_string
    
        with urllib.request.urlopen(base_url) as response:
            response_text = response.read()
            data = json.loads(response_text.decode())
            st.write('Video Title: ' + data['title'])
        st.video(url)
        generate(tokenizer, model, data['title'], features)
    
if __name__ == "__main__":
    main()