Add files via upload

hope-data-science · web-flow · commit f9f6625498fa · 2020-02-14T06:43:45.000+08:00
diff --git a/R/doc_group.R b/R/doc_group.R
@@ -0,0 +1,40 @@
+
+#' @title Construct network of documents based on keyword co-occurrence
+#'
+#' @description Create a \code{tbl_graph}(a class provided by \pkg{tidygraph}) from the tidy table with document ID and keyword.
+#' Each entry(row) should contain only one document and keyword in the tidy format.This function would
+#' group the documents.
+#' @param dt A data.frame containing at least two columns with document ID and keyword.
+#' @param id Quoted characters specifying the column name of document ID.Default uses "id".
+#' @param keyword Quoted characters specifying the column name of keyword.Default uses "keyword".
+#' @param com_detect_fun Community detection function,provided by \pkg{tidygraph}(wrappers around clustering
+#' functions provided by \pkg{igraph}), see \code{\link[tidygraph]{group_graph}} to find other optional algorithms.
+#' Default uses \code{\link[tidygraph]{group_fast_greedy}}.
+#' @return A tbl_graph, representing the document relation network based on
+#' keyword co-occurrence.
+#' @details As we could classify keywords using document ID, we could also
+#' classify documents with keywords. In the output network, the nodes are documents
+#' and the edges mean the two documents share same keywords with each other.
+#' @examples
+#'  library(akc)
+#'  bibli_data_table %>%
+#'    keyword_clean(id = "id",keyword = "keyword") %>%
+#'    doc_group(id = "id",keyword = "keyword") -> grouped_doc
+#'
+#'  grouped_doc
+
+
+#' @export
+doc_group = function(dt,id = "id",keyword = "keyword",
+                     com_detect_fun = group_fast_greedy){
+  dt %>%
+    as_tibble() %>%
+    transmute(id = .data[[id]],keyword = .data[[keyword]]) %>%
+    pairwise_count(id,keyword,upper = FALSE) %>%
+    graph_from_data_frame(directed = FALSE) %>%
+    as_tbl_graph() %>%
+    mutate(group = com_detect_fun()) %>%
+    rename(id = name)
+}
+
+
diff --git a/R/keyword_cloud.R b/R/keyword_cloud.R
@@ -4,6 +4,8 @@
 #' @description This function should be used to plot the object exported by
 #' \code{\link[akc]{keyword_group}}. It could draw a robust word cloud of keywords.
 #' @param tibble_graph A \code{tbl_graph} output by \code{\link[akc]{keyword_group}}.
+#' @param group_no If one wants to visualize a specific group, gives the group number.
+#' Default uses \code{NULL},which returns all the groups.
 #' @param top How many top keywords (by frequency) should be plot? Default uses 50.
 #' @param max_size Size of largest keyword.Default uses 20.
 #' @details In the output graph, the size of keywords is proportional to the keyword
@@ -25,9 +27,13 @@
 #'
 #' grouped_keyword %>%
 #'   keyword_cloud()
+#'
+#' grouped_keywords %>%
+#'   keyword_cloud(group_no = 1)
 
-keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
-  tibble_graph %>%
+keyword_cloud = function(tibble_graph,group_no = NULL,top = 50,max_size = 20){
+  if(is.null(group_no))
+    tibble_graph %>%
     as_tibble() %>%
     top_n(top,freq) %>%
     mutate(group = as.factor(group)) %>%
@@ -36,6 +42,16 @@ keyword_cloud = function(tibble_graph,top = 50,max_size = 20){
     scale_size_area(max_size = max_size) +
     scale_x_discrete(breaks = NULL,name = "") +
     theme_minimal()
+  else
+    tibble_graph %>%
+    as_tibble() %>%
+    filter(group == group_no) %>%
+    top_n(top,freq) %>%
+    ggplot(aes(label = name,size = freq)) +
+    geom_text_wordcloud_area() +
+    scale_size_area(max_size = max_size) +
+    scale_x_discrete(breaks = NULL,name = "") +
+    theme_minimal()
 }
 
 
diff --git a/R/keyword_network.R b/R/keyword_network.R
@@ -28,6 +28,12 @@
 #'    keyword_group(id = "id",keyword = "keyword") %>%
 #'    keyword_network()
 #'
+#' # use color with `scale_fill_`
+#'  bibli_data_table %>%
+#'    keyword_clean(id = "id",keyword = "keyword") %>%
+#'    keyword_group(id = "id",keyword = "keyword") %>%
+#'    keyword_network() + ggplot2::ggplot2::scale_fill_viridis_d()
+#'
 #'  # without facet
 #'  bibli_data_table %>%
 #'    keyword_clean(id = "id",keyword = "keyword") %>%