Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| import nltk | |
| import re | |
| import torch | |
| import networkx as nx | |
| from tqdm import tqdm | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| nltk.download('punkt') | |
| model = SentenceTransformer('all-mpnet-base-v2') | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| def get_summary(text, num_words: int=1000): | |
| sentences = nltk.sent_tokenize(text) | |
| embeddings = model.encode(sentences, show_progress_bar=False) | |
| try: | |
| sim_matrix = cosine_similarity(embeddings) | |
| except Exception as e: | |
| print(e, type(e)) | |
| print(embeddings.shape) | |
| nx_graph = nx.from_numpy_array(sim_matrix) | |
| scores = nx.pagerank(nx_graph) | |
| ranked_sentences = sorted(((scores[i],s, i) for i,s in enumerate(sentences)), reverse=True) | |
| final_sents = [] | |
| total_length = 0 | |
| for score, sents, i in ranked_sentences: | |
| total_length += len(sents.split()) | |
| if total_length < num_words: | |
| final_sents.append((score, sents, i)) | |
| else: | |
| break | |
| top_k_sents = sorted(final_sents, key=lambda x: x[2]) | |
| sents = " ".join([s[1] for s in top_k_sents]) | |
| return sents |