File size: 2,530 Bytes
1a7a06e
 
 
 
 
 
 
 
 
 
 
 
febdf4e
1a7a06e
 
 
 
 
 
 
4719e8c
 
1a7a06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
febdf4e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import os
import streamlit as st
import sys
import urllib
import json
import torch

def generate(tokenizer, model, text, features):
    generated = tokenizer("<|startoftext|> <|titlestart|>{}<|titleend|>".format(text), return_tensors="pt").input_ids
    sample_outputs = model.generate(
        generated, do_sample=True, top_k=50, 
        max_length=300, top_p=features['top_p'], temperature=features['t'] / 100.0, num_return_sequences=features['num'],
    )
    for i, sample_output in enumerate(sample_outputs):
        decoded = tokenizer.decode(sample_output, skip_special_tokens=True).split(text)[1]
        st.write(decoded)


def load_model():
    tokenizer = torch.load('./tokenizer.pt')
    model = torch.load('./model.pt', map_location=torch.device('cpu'))
    return tokenizer, model


def main():
    tokenizer, model = load_model()
    st.title("YouTube comments generating project")
    st.header('YouTube comments generator')

    st.sidebar.title("Features")
    seed = 27834096
    default_control_features = ["Количество комментариев", "Температура", "Top-p"]

    control_features = default_control_features

    # Insert user-controlled values from sliders into the feature vector.
    features = {
        "num": st.sidebar.slider("Количество комментариев", 0, 20, 1, 1),
        "t": st.sidebar.slider("Температура", 0, 300, 180, 1),
        "top_p": st.sidebar.slider("Top-p", 0, 100, 95, 5),
    }

    st.sidebar.title("Note")
    st.sidebar.write(
        """
	Изменяя значения, можно получить различные выводы модели
        """
    )
    st.sidebar.write(
        """
	Значение температуры делится на 100
        """
    )
    st.sidebar.caption(f"Streamlit version `{st.__version__}`")
    with st.form(key='my_form'):
        url = st.text_input('Введите url видео на YouTube')
        st.form_submit_button('Готово!')

    params = {"format": "json", "url": url}
    base_url = "https://www.youtube.com/oembed"
    query_string = urllib.parse.urlencode(params)
    base_url = base_url + "?" + query_string

    with urllib.request.urlopen(base_url) as response:
        response_text = response.read()
        data = json.loads(response_text.decode())
        st.write('Video Title: ' + data['title'])
    st.video(url)
    generate(tokenizer, model, data['title'], features)

if __name__ == "__main__":
    main()