|
| 1 | + |
| 2 | +#' @title Construct network of documents based on keyword co-occurrence |
| 3 | +#' |
| 4 | +#' @description Create a \code{tbl_graph}(a class provided by \pkg{tidygraph}) from the tidy table with document ID and keyword. |
| 5 | +#' Each entry(row) should contain only one document and keyword in the tidy format.This function would |
| 6 | +#' group the documents. |
| 7 | +#' @param dt A data.frame containing at least two columns with document ID and keyword. |
| 8 | +#' @param id Quoted characters specifying the column name of document ID.Default uses "id". |
| 9 | +#' @param keyword Quoted characters specifying the column name of keyword.Default uses "keyword". |
| 10 | +#' @param com_detect_fun Community detection function,provided by \pkg{tidygraph}(wrappers around clustering |
| 11 | +#' functions provided by \pkg{igraph}), see \code{\link[tidygraph]{group_graph}} to find other optional algorithms. |
| 12 | +#' Default uses \code{\link[tidygraph]{group_fast_greedy}}. |
| 13 | +#' @return A tbl_graph, representing the document relation network based on |
| 14 | +#' keyword co-occurrence. |
| 15 | +#' @details As we could classify keywords using document ID, we could also |
| 16 | +#' classify documents with keywords. In the output network, the nodes are documents |
| 17 | +#' and the edges mean the two documents share same keywords with each other. |
| 18 | +#' @examples |
| 19 | +#' library(akc) |
| 20 | +#' bibli_data_table %>% |
| 21 | +#' keyword_clean(id = "id",keyword = "keyword") %>% |
| 22 | +#' doc_group(id = "id",keyword = "keyword") -> grouped_doc |
| 23 | +#' |
| 24 | +#' grouped_doc |
| 25 | + |
| 26 | + |
| 27 | +#' @export |
| 28 | +doc_group = function(dt,id = "id",keyword = "keyword", |
| 29 | + com_detect_fun = group_fast_greedy){ |
| 30 | + dt %>% |
| 31 | + as_tibble() %>% |
| 32 | + transmute(id = .data[[id]],keyword = .data[[keyword]]) %>% |
| 33 | + pairwise_count(id,keyword,upper = FALSE) %>% |
| 34 | + graph_from_data_frame(directed = FALSE) %>% |
| 35 | + as_tbl_graph() %>% |
| 36 | + mutate(group = com_detect_fun()) %>% |
| 37 | + rename(id = name) |
| 38 | +} |
| 39 | + |
| 40 | + |
0 commit comments