

我有一个数据集,我想要整体可视化并按几个不同的变量进行分类。我创建了一个 Flexdashboard,其中包含一个闪亮的应用程序来选择分解类型,并使用工作代码来绘制正确的子集。



title: "test"
    theme: bootstrap
runtime: shiny

```{r setup, include=FALSE}

```{r global, include=FALSE}
  dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                    sex = sample(c("male", "female"), 181, replace=TRUE),
                    lang = sample(c("english", "spanish"), 181, replace=TRUE),
                    age = sample(20:35, 181, replace=TRUE))
  dat <- sample_n(dat, 80)

Sidebar {.sidebar}

  radioButtons("diss", label = "Disaggregation",
    choices = list("All" = 1, "By Sex" = 2, "By Language" = 3), 
    selected = 1)

Page 1

# all
  all <- reactive(
  dat %>%  
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>% # convert to tibble time object
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        by = "1 week"),
             fill = list(total = 0)) 

# males only
  males <- reactive(
  dat %>%  
    filter(sex=="male") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_m = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        by = "1 week"),
             fill = list(total_m = 0)) 

# females only
  females <- reactive(
  dat %>%  
    filter(sex=="female") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_f = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        by = "1 week"),
             fill = list(total_f = 0)) 

# english only
  english <- reactive(
  dat %>%  
    filter(lang=="english") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_e = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        by = "1 week"),
             fill = list(total_e = 0)) 

# spanish only
  spanish <- reactive(
  dat %>%  
    filter(lang=="spanish") %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    as_tbl_time(index = date) %>%
    select(date, new) %>%
    collapse_by('1 week', side="start", clean=TRUE) %>%
    group_by(date) %>%
    mutate(total_s = sum(new, na.rm=TRUE)) %>% 
    distinct(date, .keep_all = TRUE) %>% 
    ungroup() %>%
  # expand matrix to include weeks without data
    complete(date = seq(date[1],
                        by = "1 week"),
             fill = list(total_s = 0)) 

# combine

  totals <- reactive({

  all <- all()
  females <- females()
  males <- males()
  english <- english()
  spanish <- spanish()

  all %>%
    select(date, total) %>%
    full_join(select(females, date, total_f), by = "date") %>%
    full_join(select(males, date, total_m), by = "date") %>%
    full_join(select(english, date, total_e), by = "date") %>%
    full_join(select(spanish, date, total_s), by = "date") 

# convert to xts
  totals_ <- reactive({
    totals <- totals()
    xts(totals, order.by = totals$date)

# plot

  totals_ <- totals_()

  if (input$diss == 1) {
  dygraph(totals_[, "total"],
          main= "All") %>%
    dySeries("total", label = "All") %>%
    dyRangeSelector() %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else if (input$diss == 2) {
    dygraph(totals_[, c("total_f", "total_m")],
            main = "By sex") %>%
    dyRangeSelector() %>%
    dySeries("total_f", label = "Female") %>%
    dySeries("total_m", label = "Male") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else {
    dygraph(totals_[, c("total_e", "total_s")],
            main = "By language") %>%
    dyRangeSelector() %>%
    dySeries("total_e", label = "English") %>%
    dySeries("total_s", label = "Spanish") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE)


@Jon Spring 建议编写一个函数来减少一些重复(在下面应用),这是一个很好的改进。不过,基本方法是相同的。分割、计算、组合、绘图。有没有一种方法可以做到这一点而无需分解并重新组合在一起?

title: "test"
    theme: bootstrap
runtime: shiny

```{r setup, include=FALSE}

```{r global, include=FALSE}
# generate data
  dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                    sex = sample(c("male", "female"), 181, replace=TRUE),
                    lang = sample(c("english", "spanish"), 181, replace=TRUE),
                    age = sample(20:35, 181, replace=TRUE))
  dat <- sample_n(dat, 80)

# Jon Spring's function
  prep_dat <- function(filtered_dat, col_name = "total") {
  filtered_dat %>%
    mutate(new = 1) %>%
    arrange(date) %>%
  # time series analysis
    tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
    select(date, new) %>%
    tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
    group_by(date) %>%
    mutate(total = sum(new, na.rm = TRUE)) %>%
    distinct(date, .keep_all = TRUE) %>%
    ungroup() %>%
    # expand matrix to include weeks without data
      date = seq(date[1], date[length(date)], by = "1 week"),
      fill = list(total = 0)

Sidebar {.sidebar}

  radioButtons("diss", label = "Disaggregation",
    choices = list("All" = 1, "By Sex" = 2, "By Language" = 3), 
    selected = 1)

Page 1

# all
  all <- reactive(

# males only
  males <- reactive(
    dat %>% 
    filter(sex == "male")
  ) %>% 
    rename("total_m" = "total")

# females only
  females <- reactive(
    dat %>% 
    filter(sex == "female")
  ) %>% 
    rename("total_f" = "total")

# english only
  english <- reactive(
    dat %>% 
    filter(lang == "english")
  ) %>% 
    rename("total_e" = "total")

# spanish only
  spanish <- reactive(
    dat %>% 
    filter(lang == "spanish")
  ) %>% 
    rename("total_s" = "total")

# combine

  totals <- reactive({

  all <- all()
  females <- females()
  males <- males()
  english <- english()
  spanish <- spanish()

  all %>%
    select(date, total) %>%
    full_join(select(females, date, total_f), by = "date") %>%
    full_join(select(males, date, total_m), by = "date") %>%
    full_join(select(english, date, total_e), by = "date") %>%
    full_join(select(spanish, date, total_s), by = "date") 

# convert to xts
  totals_ <- reactive({
    totals <- totals()
    xts(totals, order.by = totals$date)

# plot

  totals_ <- totals_()

  if (input$diss == 1) {
  dygraph(totals_[, "total"],
          main= "All") %>%
    dySeries("total", label = "All") %>%
    dyRangeSelector() %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else if (input$diss == 2) {
    dygraph(totals_[, c("total_f", "total_m")],
            main = "By sex") %>%
    dyRangeSelector() %>%
    dySeries("total_f", label = "Female") %>%
    dySeries("total_m", label = "Male") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE) 
  } else {
    dygraph(totals_[, c("total_e", "total_s")],
            main = "By language") %>%
    dyRangeSelector() %>%
    dySeries("total_e", label = "English") %>%
    dySeries("total_s", label = "Spanish") %>%
    dyOptions(useDataTimezone = FALSE,
              stepPlot = TRUE,
              drawGrid = FALSE,
              fillGraph = TRUE)

感谢您详细解释您的目标。我认为 @simon-s-a 建议的方法会简化事情。如果我们可以动态地运行分组,并对其进行结构化,这样我们就不需要事先知道这些组中可能存在的组件,那么维护起来就会容易得多。


  1. 一旦按日期分组,无论我们的分组变量是什么,它都会计算每个组有多少行,然后展开这些行,以便每个组获得一列。

  2. 然后我用padr::pad填充中间任何缺失的时间行,并将所有 NA 替换为零。

  3. 最后,该数据帧被转换为xts对象并输入到 dygraph 中,它似乎可以自动处理多列。


title: "test"
    theme: bootstrap
runtime: shiny

```{r setup, include=FALSE}

```{r global, include=FALSE}
# generate data
dat <- data.frame(date = seq(as.Date("2018-01-01"), 
                  sex = sample(c("male", "female"), 181, replace=TRUE),
                  lang = sample(c("english", "spanish"), 181, replace=TRUE),
                  age = sample(20:35, 181, replace=TRUE))
dat <- dplyr::sample_n(dat, 80)

Sidebar {.sidebar}


radioButtons("diss", label = "Disaggregation",
             choices = list("All" = "Total",
                            "By Sex" = "sex",
                            "By Language" = "lang"), 
             selected = "Total")

Page 1

```{r plot}

  grp_col <- rlang::sym(input$diss) # This converts the input selection to a symbol

  dat %>%
    mutate(Total = 1) %>% # This is a hack to let us "group" by Total -- all one group

    # Here's where we unquote the symbol so that dplyr can use it 
    #   to refer to a column. In this case I make a dummy column 
    #   that's a copy of whatever column we want to group
    mutate(my_group = !!grp_col) %>%

    # Now we make a group for every existing combination of week 
    #   (using lubridate::floor_date) and level of our grouping column,
    #   count how many rows in each group, and spread that to wide format.
    group_by(date = lubridate::floor_date(date, "1 week"), my_group) %>%
    count() %>% spread(my_group, n) %>% ungroup() %>%

    # padr:pad() fills in any missing weeks in the sequence with new rows
    #   Then we replace all the NA's with zeroes.
    padr::pad() %>% replace(is.na(.), 0) %>%

    # Finally we can convert to xts and feed the wide table into digraph.
    xts::xts(order.by = .$date) %>%
    dygraph() %>%
    dyRangeSelector() %>%
      useDataTimezone = FALSE, stepPlot = TRUE,
      drawGrid = FALSE, fillGraph = TRUE

