Made output of nearest neighbours downloadable
Browse files- .gitignore +1 -0
- app.py +20 -4
- word2vec.py +15 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            downloads
         | 
    	
        app.py
    CHANGED
    
    | @@ -39,8 +39,7 @@ if active_tab == "Nearest neighbours": | |
| 39 | 
             
                        elif time_slice == 'Late Roman':
         | 
| 40 | 
             
                            time_slice = 'late_roman'
         | 
| 41 |  | 
| 42 | 
            -
                        time_slice = time_slice.lower() + "_cbow"
         | 
| 43 | 
            -
                        
         | 
| 44 |  | 
| 45 |  | 
| 46 | 
             
                        # Check if all fields are filled in
         | 
| @@ -56,8 +55,25 @@ if active_tab == "Nearest neighbours": | |
| 56 | 
             
                                nearest_neighbours,
         | 
| 57 | 
             
                                columns=["Word", "Time slice", "Similarity"],
         | 
| 58 | 
             
                                index = range(1, len(nearest_neighbours) + 1)
         | 
| 59 | 
            -
                            )
         | 
| 60 | 
            -
                            st.table(df)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 61 |  | 
| 62 |  | 
| 63 | 
             
            # Cosine similarity tab
         | 
|  | |
| 39 | 
             
                        elif time_slice == 'Late Roman':
         | 
| 40 | 
             
                            time_slice = 'late_roman'
         | 
| 41 |  | 
| 42 | 
            +
                        time_slice = time_slice.lower() + "_cbow"           
         | 
|  | |
| 43 |  | 
| 44 |  | 
| 45 | 
             
                        # Check if all fields are filled in
         | 
|  | |
| 55 | 
             
                                nearest_neighbours,
         | 
| 56 | 
             
                                columns=["Word", "Time slice", "Similarity"],
         | 
| 57 | 
             
                                index = range(1, len(nearest_neighbours) + 1)
         | 
| 58 | 
            +
                            )              
         | 
| 59 | 
            +
                            st.table(df)             
         | 
| 60 | 
            +
                            
         | 
| 61 | 
            +
                            
         | 
| 62 | 
            +
                            # Store content in a temporary file
         | 
| 63 | 
            +
                            tmp_file = store_df_in_temp_file(df)
         | 
| 64 | 
            +
                            
         | 
| 65 | 
            +
                            # Open the temporary file and read its content
         | 
| 66 | 
            +
                            with open(tmp_file, "rb") as file:
         | 
| 67 | 
            +
                                file_byte = file.read()
         | 
| 68 | 
            +
                                
         | 
| 69 | 
            +
                                # Create download button
         | 
| 70 | 
            +
                                st.download_button(
         | 
| 71 | 
            +
                                    "Download results",
         | 
| 72 | 
            +
                                    data=file_byte,
         | 
| 73 | 
            +
                                    file_name = f'nearest_neighbours_{word}_{time_slice}.xlsx',
         | 
| 74 | 
            +
                                    mime='application/octet-stream'
         | 
| 75 | 
            +
                                    )
         | 
| 76 | 
            +
             | 
| 77 |  | 
| 78 |  | 
| 79 | 
             
            # Cosine similarity tab
         | 
    	
        word2vec.py
    CHANGED
    
    | @@ -2,6 +2,7 @@ from gensim.models import Word2Vec | |
| 2 | 
             
            from collections import defaultdict
         | 
| 3 | 
             
            import os
         | 
| 4 | 
             
            import tempfile
         | 
|  | |
| 5 |  | 
| 6 |  | 
| 7 | 
             
            def load_all_models():
         | 
| @@ -249,6 +250,20 @@ def write_to_file(data): | |
| 249 | 
             
                return temp_file_path
         | 
| 250 |  | 
| 251 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 252 | 
             
            def main():
         | 
| 253 | 
             
                # model = load_word2vec_model('models/archaic_cbow.model')
         | 
| 254 | 
             
                # archaic_cbow_dict = model_dictionary(model)
         | 
|  | |
| 2 | 
             
            from collections import defaultdict
         | 
| 3 | 
             
            import os
         | 
| 4 | 
             
            import tempfile
         | 
| 5 | 
            +
            import pandas as pd
         | 
| 6 |  | 
| 7 |  | 
| 8 | 
             
            def load_all_models():
         | 
|  | |
| 250 | 
             
                return temp_file_path
         | 
| 251 |  | 
| 252 |  | 
| 253 | 
            +
            def store_df_in_temp_file(df):
         | 
| 254 | 
            +
                '''
         | 
| 255 | 
            +
                    Store the dataframe in a temporary file
         | 
| 256 | 
            +
                '''
         | 
| 257 | 
            +
                # Create random tmp file name
         | 
| 258 | 
            +
                _, temp_file_path = tempfile.mkstemp(prefix="temp_", suffix=".xlsx", dir="./downloads/nn")
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                # Write data to the temporary file
         | 
| 261 | 
            +
                with pd.ExcelWriter(temp_file_path, engine='xlsxwriter') as writer:
         | 
| 262 | 
            +
                    df.to_excel(writer, index=False)
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                return temp_file_path
         | 
| 265 | 
            +
             | 
| 266 | 
            +
             | 
| 267 | 
             
            def main():
         | 
| 268 | 
             
                # model = load_word2vec_model('models/archaic_cbow.model')
         | 
| 269 | 
             
                # archaic_cbow_dict = model_dictionary(model)
         | 
