Mark7549 commited on
Commit
5a91b2b
·
1 Parent(s): 17c5755

Removed vector_graph.py, functions weren't used anymore

Browse files
Files changed (2) hide show
  1. app.py +0 -1
  2. vector_graph.py +0 -72
app.py CHANGED
@@ -3,7 +3,6 @@ from streamlit_option_menu import option_menu
3
  from word2vec import *
4
  import pandas as pd
5
  from autocomplete import *
6
- from vector_graph import *
7
  from plots import *
8
  from lsj_dict import *
9
  import json
 
3
  from word2vec import *
4
  import pandas as pd
5
  from autocomplete import *
 
6
  from plots import *
7
  from lsj_dict import *
8
  import json
vector_graph.py DELETED
@@ -1,72 +0,0 @@
1
- from word2vec import *
2
- import numpy as np
3
- from sklearn.decomposition import PCA
4
- from sklearn.preprocessing import StandardScaler
5
- import pandas as pd
6
-
7
-
8
-
9
- def create_3d_vectors(word, time_slice, nearest_neighbours_vectors):
10
- """
11
- Turn word vectors into 3D vectors
12
- """
13
- model = load_word2vec_model(f'models/{time_slice}.model')
14
-
15
- # Compress all vectors to 3D
16
- model_df = pd.DataFrame(model.wv.vectors)
17
- pca_vectors = PCA(n_components=3)
18
- pca_model = pca_vectors.fit_transform(model_df)
19
- pca_model_df = pd.DataFrame(
20
- data = pca_model,
21
- columns = ['x', 'y', 'z']
22
- )
23
- pca_model_df.insert(0, 'word', model.wv.index_to_key)
24
-
25
- return pca_model_df
26
-
27
-
28
-
29
-
30
- def create_3d_models(time_slice):
31
- """
32
- Create 3D models for each time slice
33
- """
34
- time_slice_model = convert_time_name_to_model(time_slice)
35
- model = load_word2vec_model(f'models/{time_slice_model}.model')
36
-
37
- # Compress all vectors to 3D
38
- model_df = pd.DataFrame(model.wv.vectors)
39
- pca_vectors = PCA(n_components=3)
40
- pca_model = pca_vectors.fit_transform(model_df)
41
- pca_model_df = pd.DataFrame(
42
- data = pca_model,
43
- columns = ['x', 'y', 'z']
44
- )
45
-
46
- pca_model_df.insert(0, 'word', model.wv.index_to_key)
47
-
48
- pca_model_df.to_csv(f'3d_models/{time_slice}_3d.csv', index=False)
49
- return pca_model_df, pca_vectors
50
-
51
-
52
- def nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors):
53
- """
54
- Turn nearest neighbours into 3D vectors
55
- """
56
- model_df = pd.read_csv(f'3d_models/{time_slice}_3d.csv')
57
-
58
- new_data = []
59
-
60
- # Get the word vector for the nearest neighbours
61
- for neighbour in nearest_neighbours_vectors:
62
- word = neighbour[0]
63
- cosine_sim = neighbour[3]
64
- vector_3d = model_df[model_df['word'] == word][['x', 'y', 'z']].values[0]
65
-
66
- # Add word, cosine_sim and 3D vector to new data list
67
- new_data.append({'word': word, 'cosine_sim': cosine_sim, '3d_vector': vector_3d})
68
-
69
- # Convert the list of dictionaries to a DataFrame
70
- new_df = pd.DataFrame(new_data)
71
-
72
- return new_df