m7n commited on
Commit
2395603
·
1 Parent(s): 7809ddd

drop duplicates

Browse files
Files changed (1) hide show
  1. openalex_utils.py +1 -0
openalex_utils.py CHANGED
@@ -97,6 +97,7 @@ def process_records_to_df(records):
97
  records_df['parsed_publication'] = records_df['parsed_publication'].fillna(' ')
98
  records_df['abstract'] = records_df['abstract'].fillna(' ')
99
  records_df['title'] = records_df['title'].fillna(' ')
 
100
 
101
  return records_df
102
 
 
97
  records_df['parsed_publication'] = records_df['parsed_publication'].fillna(' ')
98
  records_df['abstract'] = records_df['abstract'].fillna(' ')
99
  records_df['title'] = records_df['title'].fillna(' ')
100
+ records_df = records_df.drop_duplicates(subset=['id']).reset_index(drop=True)
101
 
102
  return records_df
103