Ifeanyi commited on
Commit
bd79643
·
1 Parent(s): e76e0af

Delete OpenAlexEdges.R

Browse files
Files changed (1) hide show
  1. OpenAlexEdges.R +0 -132
OpenAlexEdges.R DELETED
@@ -1,132 +0,0 @@
1
- authorPubEdges <- function(keywords,pub_start_date,pub_end_date){
2
-
3
- keywords <- keywords
4
- pub_start_date <- pub_start_date
5
- pub_end_date <- pub_end_date
6
-
7
- # create search engine function
8
- search_engine <- function(keywords,pub_start_date,pub_end_date){
9
- suppressPackageStartupMessages(library(openalexR))
10
- suppressPackageStartupMessages(library(tidyverse))
11
-
12
- options(openalexR.mailto = "[email protected]")
13
-
14
- # search engine
15
- works_search <- oa_fetch(
16
- entity = "works",
17
- title.search = c(keywords),
18
- cited_by_count = ">50",
19
- from_publication_date = pub_start_date,
20
- to_publication_date = pub_end_date,
21
- options = list(sort = "cited_by_count:desc"),
22
- verbose = FALSE
23
- )
24
-
25
- return(works_search)
26
-
27
- }
28
-
29
- # import nodes function
30
- source("openAlexNodes.R")
31
-
32
- # run author nodes function
33
- author_nodes <- authorPubNodes(keywords,pub_start_date,pub_end_date)
34
-
35
- # run search engine
36
- search_data <- search_engine(keywords,pub_start_date,pub_end_date)
37
-
38
-
39
- # grab authors and group them according to collaboration
40
- authors_collaboration_groups <- list()
41
- for (i in 1:nrow(search_data)){
42
- authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
43
- }
44
-
45
- # grab all authors
46
- all_authors <- c()
47
- for (i in 1:length(authors_collaboration_groups)) {
48
- all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
49
- }
50
-
51
- # get length of each authors collaboration
52
- authors_length <- c()
53
- for(authors in 1:length(authors_collaboration_groups)){
54
- authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
55
- }
56
-
57
- # grab all publications
58
- publications <- list()
59
- for (i in 1:nrow(search_data)){
60
- publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
61
- }
62
-
63
- # place all publications in a vector
64
- all_publications <- c()
65
- for(i in 1:length(publications)){
66
- all_publications <- c(all_publications,publications[[i]])
67
- }
68
-
69
- # create author_to_publication data frame
70
- authors_to_publications <- data.frame(
71
- Authors = all_authors,
72
- Publications = all_publications
73
- )
74
-
75
- # create edges data frame
76
- author_publication_edges <- data.frame(
77
- Source = authors_to_publications$Authors,
78
- Target = authors_to_publications$Publications,
79
- Type = "directed",
80
- Weight = 1.0
81
- )
82
-
83
-
84
- # replace edges with id from nodes data set
85
- replace_edges_with_ids <- function(author_edges, author_nodes) {
86
- # Create a lookup table for node values to their corresponding Ids
87
- node_lookup <- setNames(author_nodes$Id, author_nodes$Node)
88
-
89
- # Use the lookup table to replace Source and Target values in author_edges
90
- author_edges$Source <- node_lookup[author_edges$Source]
91
- author_edges$Target <- node_lookup[author_edges$Target]
92
-
93
- return(author_edges)
94
- }
95
-
96
- # Call the function with your data frames
97
- author_publication_edges <- replace_edges_with_ids(author_publication_edges, nodes_df)
98
-
99
- return(author_publication_edges)
100
-
101
-
102
- }
103
-
104
- # tictoc::tic()
105
- # authorPub_data <- search_engine(keywords = c("data","deep learning","analytics"),
106
- # pub_start_date = "2021-01-01",
107
- # pub_end_date = "2022-12-31")
108
- # tictoc::toc()
109
- #
110
- #
111
- # tictoc::tic()
112
- # author_nodes <- authorPubNodes(search_data = authorPub_data)
113
- # tictoc::toc()
114
- # author_nodes |> view()
115
- #
116
- # tictoc::tic()
117
- author_edges <- authorPubEdges(keywords = c("data","deep learning","analytics"),
118
- pub_start_date = "2021-01-01",
119
- pub_end_date = "2022-12-31")
120
- # tictoc::toc()
121
- #
122
- # author_edges |> view()
123
- #
124
- # write.csv(author_edges,file = "authorPubEdges2.csv")
125
-
126
- edges_pub <- authorPubEdges(keywords = c("data","deep learning","analytics"),
127
- pub_start_date = "2021-01-01",
128
- pub_end_date = "2022-12-31")
129
-
130
- edges_pub |> view()
131
-
132
- write.csv(edges_pub,file = "edges_pub.csv",row.names = F)