Skip to content

Commit f9f6625

Browse files
Add files via upload
1 parent 92c14c9 commit f9f6625

File tree

3 files changed

+64
-2
lines changed

3 files changed

+64
-2
lines changed

R/doc_group.R

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
2+
#' @title Construct network of documents based on keyword co-occurrence
3+
#'
4+
#' @description Create a \code{tbl_graph}(a class provided by \pkg{tidygraph}) from the tidy table with document ID and keyword.
5+
#' Each entry(row) should contain only one document and keyword in the tidy format.This function would
6+
#' group the documents.
7+
#' @param dt A data.frame containing at least two columns with document ID and keyword.
8+
#' @param id Quoted characters specifying the column name of document ID.Default uses "id".
9+
#' @param keyword Quoted characters specifying the column name of keyword.Default uses "keyword".
10+
#' @param com_detect_fun Community detection function,provided by \pkg{tidygraph}(wrappers around clustering
11+
#' functions provided by \pkg{igraph}), see \code{\link[tidygraph]{group_graph}} to find other optional algorithms.
12+
#' Default uses \code{\link[tidygraph]{group_fast_greedy}}.
13+
#' @return A tbl_graph, representing the document relation network based on
14+
#' keyword co-occurrence.
15+
#' @details As we could classify keywords using document ID, we could also
16+
#' classify documents with keywords. In the output network, the nodes are documents
17+
#' and the edges mean the two documents share same keywords with each other.
18+
#' @examples
19+
#' library(akc)
20+
#' bibli_data_table %>%
21+
#' keyword_clean(id = "id",keyword = "keyword") %>%
22+
#' doc_group(id = "id",keyword = "keyword") -> grouped_doc
23+
#'
24+
#' grouped_doc
25+
26+
27+
#' @export
28+
doc_group = function(dt,id = "id",keyword = "keyword",
29+
com_detect_fun = group_fast_greedy){
30+
dt %>%
31+
as_tibble() %>%
32+
transmute(id = .data[[id]],keyword = .data[[keyword]]) %>%
33+
pairwise_count(id,keyword,upper = FALSE) %>%
34+
graph_from_data_frame(directed = FALSE) %>%
35+
as_tbl_graph() %>%
36+
mutate(group = com_detect_fun()) %>%
37+
rename(id = name)
38+
}
39+
40+

R/keyword_cloud.R

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#' @description This function should be used to plot the object exported by
55
#' \code{\link[akc]{keyword_group}}. It could draw a robust word cloud of keywords.
66
#' @param tibble_graph A \code{tbl_graph} output by \code{\link[akc]{keyword_group}}.
7+
#' @param group_no If one wants to visualize a specific group, gives the group number.
8+
#' Default uses \code{NULL},which returns all the groups.
79
#' @param top How many top keywords (by frequency) should be plot? Default uses 50.
810
#' @param max_size Size of largest keyword.Default uses 20.
911
#' @details In the output graph, the size of keywords is proportional to the keyword
@@ -25,9 +27,13 @@
2527
#'
2628
#' grouped_keyword %>%
2729
#' keyword_cloud()
30+
#'
31+
#' grouped_keywords %>%
32+
#' keyword_cloud(group_no = 1)
2833

29-
keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
30-
tibble_graph %>%
34+
keyword_cloud = function(tibble_graph,group_no = NULL,top = 50,max_size = 20){
35+
if(is.null(group_no))
36+
tibble_graph %>%
3137
as_tibble() %>%
3238
top_n(top,freq) %>%
3339
mutate(group = as.factor(group)) %>%
@@ -36,6 +42,16 @@ keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
3642
scale_size_area(max_size = max_size) +
3743
scale_x_discrete(breaks = NULL,name = "") +
3844
theme_minimal()
45+
else
46+
tibble_graph %>%
47+
as_tibble() %>%
48+
filter(group == group_no) %>%
49+
top_n(top,freq) %>%
50+
ggplot(aes(label = name,size = freq)) +
51+
geom_text_wordcloud_area() +
52+
scale_size_area(max_size = max_size) +
53+
scale_x_discrete(breaks = NULL,name = "") +
54+
theme_minimal()
3955
}
4056

4157

R/keyword_network.R

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@
2828
#' keyword_group(id = "id",keyword = "keyword") %>%
2929
#' keyword_network()
3030
#'
31+
#' # use color with `scale_fill_`
32+
#' bibli_data_table %>%
33+
#' keyword_clean(id = "id",keyword = "keyword") %>%
34+
#' keyword_group(id = "id",keyword = "keyword") %>%
35+
#' keyword_network() + ggplot2::ggplot2::scale_fill_viridis_d()
36+
#'
3137
#' # without facet
3238
#' bibli_data_table %>%
3339
#' keyword_clean(id = "id",keyword = "keyword") %>%

0 commit comments

Comments
 (0)