File size: 2,134 Bytes
d5761e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import logging
import timeit
import json
import os
import torch
import streamlit as st
# This should stay above the import of transformers to have model downloaded in the same directory as the project
os.environ['TRANSFORMERS_CACHE'] = os.curdir + '/cache'
from transformers import pipeline


logging.basicConfig(
    level=logging.INFO,
    filename='llm.log',
    filemode='a',
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')


@st.cache_resource
def init():
    summarizer = pipeline("summarization",
                          model="sshleifer/distilbart-cnn-12-6",
                          use_fast=True,
                          device=0 if torch.cuda.is_available() else -1
                          )
    detector = pipeline(
        "text-classification",
        model="1aurent/distilbert-base-multilingual-cased-finetuned-email-spam",
        use_fast=True)
    tagger = pipeline("text2text-generation",
                      model="fabiochiu/t5-base-tag-generation",
                      use_fast=True)
    return [summarizer, detector, tagger]


def summarize(prompt, summarizer):
    start = timeit.default_timer()
    summarized = summarizer(prompt[:2048], truncation=True)
    stop = timeit.default_timer()
    logging.info(f"Summary: {summarized}")
    logging.info(f"Time taken to summarize: {stop - start}")

    return summarized


def detect_spam(prompt, detector):
    spam = detector(prompt[:2048], truncation=True)
    return spam[0]['label']


def get_tags(prompt, tagger):
    tags = tagger(prompt[:2048], truncation=True)
    return tags


# if __name__ == "__main__":
#   llm = Summarizer()

#   summary = llm.summarize("""
# image.png


# Job Chahiye!?!?

# GDSC is here with another fantastic event
# DSA Busted
# This event will teach you about DATA STRUCTURES AND ALGORITHMS, as well as how to tackle coding rounds.
# Every Saturday, we will have live doubt sessions.
# Every Sunday, we will have a quiz.
# CERTIFICATE and  Exciting GOODIES from GOOGLE.

# So, don't pass up this excellent opportunity to begin or fast track your placement preparations.

# """)
#   print(summary)