查找每个字符(A、B、C、D)在包含 3 个数据集的列表的每一列中出现的次数比例

2024-03-28

我有一个包含 4 个数据集的列表(下面的 dput())。我还有一个名为“u”的变量,有 4 个字符。我在这里制作了一个视频 https://youtu.be/PMcBt-HT5Cs这解释了我想要什么和电子表格 https://docs.google.com/spreadsheets/d/1Tp7CIQiaN6pRFJycSMY5HdpQXiwhjtlKJ4ne4c4a5ag/edit#gid=0在这儿。

电子表格与我的数据并不完全一样,但我只是将其用作示例。我的原始列表有 4 个数据集,但电子表格有 3 个数据集。

本质上我有一些字符(A,B,C,D),我想找到每个字符在三组数据集的每一列中出现的次数比例。(检查视频,很难通过打字来解释)

u<- c("D", "B", "C", "A")

l<- list(`0` = structure(list(X70 = "D", X71 = "C", X72 = "C", X73 = "A", X74 = "B", X75 = "C", X76 = "D", X77 = NA_character_, X78 = "B", X79 = "D", X80 = "C", Q = 1), row.names = 32L, class = "data.frame"), `1` = structure(list(X70 = c("D", "B", "D", "D", "B", "D", "D", "D", "D", "D", "D"), X71 = c("B", "B", "C", "C", "C", NA, "D", "B", "C", "A", "C"), X72 = c("A", "A", "C", "B", "C", "C", "C", "C", "D", "B", NA), X73 = c("B", "C", "C", "B", "C", "D", "A", "B", "C", "C", NA), X74 = c("B", "A", "C", "D", "B", "D", NA, "D", "D", "D", NA), X75 = c("C", "C", "B", "C", "D", "D", "C", "A", "C", "C", "C"), X76 = c("D", "A", "D", "B", "D", "C", "D", "A", "A", "D", "B"), X77 = c("D", "C", "B", "B", "B", "C", "B", "B", "B", "B", "D"), X78 = c("B", "C", "C", "B", "A", "A", "C", "B", "A", "C", NA), X79 = c("C", "C", NA, NA, "D", "A", "A", "A", "D", "A", "D"), X80 = c("B", "A", NA, NA, "B", "C", "B", NA, "B", "C", "A"), Q = c(2, 2, 1, 1, 2, 2, 1, 1, 4, 3, 1)), row.names = c(8L, 10L, 12L, 17L, 25L, 27L, 28L, 33L, 35L, 38L, 45L), class = "data.frame"), `2` = structure(list(X70 = c("D", "D", "D", "B", "D", "C", "D", "D", "D", "D", "D", "D"), X71 = c("A", "B", "C", "C", "A", "A", "C", "B", "C", "C", "D", "B"), X72 = c("D", "C", "D", "A", "A", "C", "D", "C", NA, "D", "C", "B"), X73 = c("B", "D", "D", "C", "B", "D", "D", "D", NA, NA, "C", "A"), X74 = c("D", "C", "B", "D", "C", "B", "C", "C", "B", NA, "C", "D"), X75 = c("B", "C", "C", "C", NA, "C", "B", "C", "C", "C", "B", "C"), X76 = c("A", "D", "D", "D", NA, "D", "D", "A", "D", "D", "D", "D"), X77 = c("B", "B", "D", "B", NA, "B", "D", "B", "B", "B", "B", "B"), X78 = c("C", "D", "C", "B", NA, "D", "C", "C", "B", "D", "C", NA), X79 = c("A", "D", "D", "D", NA, "D", "A", NA, "A", "D", "B", NA), X80 = c(NA, "C", "C", "A", NA, "C", "C", NA, "B", "C", "C", NA), Q = c(2, 3, 3, 1, 3, 1, 2, 2, 1, 2, 2, 1)), row.names = c(4L, 5L, 6L, 11L, 15L, 16L, 21L, 22L, 26L, 37L, 39L, 43L), class = "data.frame"), `3` = structure(list(X70 = c("A", "A", "D", "C", "D", "D", "D", "D", NA, "D", "D", "D"), X71 = c("B", "C", "D", "D", "C", "C", "B", "C", "C", "C", "A", "D"), X72 = c("B", "C", NA, "B", "A", "C", "B", "A", "C", "C", "D", "B"), X73 = c(NA, "C", "C", "A", "D", "C", "A", "A", "D", "B", "D", "B"), X74 = c(NA, "C", "D", "B", "A", "D", NA, "D", "B", "A", "D", "A"), X75 = c(NA, "C", "B", "D", "C", "C", "C", "C", "C", "B", "C", "D"), X76 = c(NA, "D", "A", "B", "A", "D", "D", "D", "D", "D", "D", "D"), X77 = c(NA, "B", "B", "B", "C", "B", "A", "B", NA, "C", "D", "D"), X78 = c(NA, "C", "C", "B", "C", "B", "A", "C", "D", "C", "C", "C"), X79 = c(NA, "D", "D", NA, "B", "D", "A", "D", "A", "D", "D", "A"), X80 = c(NA, "C", "C", NA, "D", "C", "C", "C", "C", "C", "B", "C"), Q = c(2, 2, 2, 2, 4, 2, 4, 4, 4, 3, 3, 2)), row.names = c(2L, 13L, 14L, 18L, 19L, 20L, 29L, 30L, 34L, 36L, 41L, 44L), class = "data.frame"), `4` = structure(list(X70 = c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D"), X71 = c("A", NA, "A", "B", "C", "A", "A", "C", "B", "C", "C", "C"), X72 = c("B", "C", "C", "C", NA, "C", "B", "A", "C", "B", NA, "A"), X73 = c(NA, "D", "D", "D", "B", "D", "D", "D", "C", "A", "A", "C"), X74 = c("C", "A", "C", "D", "C", "C", "A", "A", "C", "D", "D", "D"), X75 = c("C", "C", "C", "C", "C", "C", "C", "C", "C", "D", "C", "C"), X76 = c("D", "D", "D", "D", "D", "D", "D", "D", "A", "D", "D", "A"), X77 = c(NA, "B", "D", "B", NA, "B", "B", "B", "C", "D", NA, "C"), X78 = c("C", "C", "C", "C", "A", "A", "C", "A", "C", "C", "C", "C"), X79 = c("D", "D", "A", "D", "D", "A", "D", "D", "A", "D", "C", "C"), X80 = c("C", "C", "C", "C", NA, "C", "C", "C", "C", "C", "C", "A"), Q = c(2, 4, 4, 3, 2, 4, 2, 4, 1, 1, 2, 4)), row.names = c(1L, 3L, 7L, 9L, 23L, 24L, 31L, 40L, 42L, 46L, 47L, 48L), class = "data.frame"))

我仔细阅读了 matplot 文档,我想添加一些我无法弄清楚如何使用 matplot 进行绘制的功能。但我很久以前就得到了某人的帮助,他帮助我使用 ggplot 创建了这个漂亮的图表。每行末尾都有 A、B、C、D,看起来也非常漂亮!这正是我想要创建的图表。有没有办法为每个表重新创建这个图表out。我已经发布了下面的代码。这将为一张表创建图表。但我想创建一个图表每桌就像我们使用 matplot 所做的那样。

library(tidyverse)

d = structure(c(0.129310344827586, 0.258620689655172, 0.318965517241379, 
        0.293103448275862, 0.12972972972973, 0.210810810810811, 0.345945945945946, 
        0.313513513513514, 0.0845070422535211, 0.154929577464789, 0.338028169014085, 
        0.422535211267606, 0.226415094339623, 0.0943396226415094, 0.367924528301887, 
        0.311320754716981), .Dim = c(4L, 4L), .Dimnames = list(c("A", "B", "C", "D"), c("1", "2", "3", "4")))

d = d %>% 
  data.frame %>%
  rownames_to_column(var = "Groups") %>% 
  pivot_longer(cols = 2:5) %>%
  group_by(Groups) %>% 
  mutate(name = sub("X", "", name) %>% as.numeric(), n = 1:n())

ggplot(data = d) + 
  geom_path(aes(x = name, y = value, group = factor(Groups), color = factor(Groups)), size = 0.7) +
  geom_point(aes(x = name, y = value, color = factor(Groups)), size = 2) +
  geom_text(data = d %>% filter(n == max(n)), aes(x = name, y = value, label = Groups, color = factor(Groups)), nudge_x = 0.2) + 
  labs(x = "Group", y = "P") + 
  theme_bw() +
  theme(legend.position = "none")

我们可以循环list'l' 与lapply,然后得到table对于每一列,通过循环遍历列sapply将列转换为factor with levels指定为“u”,得到proportions, t转置,转换为data.frame (as.data.frame),按行分割(asplit- MARGIN = 1),然后使用transpose from purrr改变结构,使所有的每一列list元素将作为一个单元被阻止,将它们绑定bind_rows

library(dplyr)
library(purrr)
out <- lapply(l, function(dat) 
   asplit(as.data.frame(t(sapply(dat, function(x) 
            proportions(table(factor(unlist(x), levels = u)))))), 1) ) %>%
    transpose %>%
    map(bind_rows, .id = 'grp')

-output

out
$X70
# A tibble: 5 x 5
  grp       D      B      C     A
  <chr> <dbl>  <dbl>  <dbl> <dbl>
1 0     1     0      0      0    
2 1     0.818 0.182  0      0    
3 2     0.833 0.0833 0.0833 0    
4 3     0.727 0      0.0909 0.182
5 4     1     0      0      0    

$X71
# A tibble: 5 x 5
  grp        D     B     C      A
  <chr>  <dbl> <dbl> <dbl>  <dbl>
1 0     0      0     1     0     
2 1     0.1    0.3   0.5   0.1   
3 2     0.0833 0.25  0.417 0.25  
4 3     0.25   0.167 0.5   0.0833
5 4     0      0.182 0.455 0.364 

$X72
# A tibble: 5 x 5
  grp        D      B     C     A
  <chr>  <dbl>  <dbl> <dbl> <dbl>
1 0     0      0      1     0    
2 1     0.1    0.2    0.5   0.2  
3 2     0.364  0.0909 0.364 0.182
4 3     0.0909 0.364  0.364 0.182
5 4     0      0.3    0.5   0.2  

$X73
# A tibble: 5 x 5
  grp       D      B     C     A
  <chr> <dbl>  <dbl> <dbl> <dbl>
1 0     0     0      0     1    
2 1     0.1   0.3    0.5   0.1  
3 2     0.5   0.2    0.2   0.1  
4 3     0.273 0.182  0.273 0.273
5 4     0.545 0.0909 0.182 0.182

$X74
# A tibble: 5 x 5
  grp       D     B     C     A
  <chr> <dbl> <dbl> <dbl> <dbl>
1 0     0     1     0     0    
2 1     0.556 0.222 0.111 0.111
3 2     0.273 0.273 0.455 0    
4 3     0.4   0.2   0.1   0.3  
5 4     0.333 0     0.417 0.25 

$X75
# A tibble: 5 x 5
  grp        D      B     C      A
  <chr>  <dbl>  <dbl> <dbl>  <dbl>
1 0     0      0      1     0     
2 1     0.182  0.0909 0.636 0.0909
3 2     0      0.273  0.727 0     
4 3     0.182  0.182  0.636 0     
5 4     0.0833 0      0.917 0     

$X76
# A tibble: 5 x 5
  grp       D      B      C     A
  <chr> <dbl>  <dbl>  <dbl> <dbl>
1 0     1     0      0      0    
2 1     0.455 0.182  0.0909 0.273
3 2     0.818 0      0      0.182
4 3     0.727 0.0909 0      0.182
5 4     0.833 0      0      0.167

$X77
# A tibble: 5 x 5
  grp         D       B       C     A
  <chr>   <dbl>   <dbl>   <dbl> <dbl>
1 0     NaN     NaN     NaN     NaN  
2 1       0.182   0.636   0.182   0  
3 2       0.182   0.818   0       0  
4 3       0.2     0.5     0.2     0.1
5 4       0.222   0.556   0.222   0  

$X78
# A tibble: 5 x 5
  grp        D     B     C      A
  <chr>  <dbl> <dbl> <dbl>  <dbl>
1 0     0      1     0     0     
2 1     0      0.3   0.4   0.3   
3 2     0.3    0.2   0.5   0     
4 3     0.0909 0.182 0.636 0.0909
5 4     0      0     0.75  0.25  

$X79
# A tibble: 5 x 5
  grp       D     B     C     A
  <chr> <dbl> <dbl> <dbl> <dbl>
1 0     1     0     0     0    
2 1     0.333 0     0.222 0.444
3 2     0.556 0.111 0     0.333
4 3     0.6   0.1   0     0.3  
5 4     0.583 0     0.167 0.25 

$X80
# A tibble: 5 x 5
  grp       D     B     C      A
  <chr> <dbl> <dbl> <dbl>  <dbl>
1 0       0   0     1     0     
2 1       0   0.5   0.25  0.25  
3 2       0   0.125 0.75  0.125 
4 3       0.1 0.1   0.8   0     
5 4       0   0     0.909 0.0909

$Q
# A tibble: 5 x 5
  grp       D     B     C     A
  <chr> <dbl> <dbl> <dbl> <dbl>
1 0       NaN   NaN   NaN   NaN
2 1       NaN   NaN   NaN   NaN
3 2       NaN   NaN   NaN   NaN
4 3       NaN   NaN   NaN   NaN
5 4       NaN   NaN   NaN   NaN

为了绘制单个组件,请提取list元素与[[,删除第一个“组”列([-1])并使用matplot

matplot(out[[1]][-1], type = "l", col = 1:4, xaxt = "n")
axis(side=1, at=1:4, labels=colnames(out[[1]][-1]))
legend("topleft", legend = colnames(out[[1]][-1]), fill = 1:4)

如果我们希望将其应用于所有元素,请循环使用lapply

par(mfrow = c(4, 3))
out2 <- lapply(out[-12], function(x) {
     matplot(x[-1], type = "l", col = 1:4, xaxt = "n")
     axis(side=1, at=1:4, labels=colnames(x[-1]))
     legend("topleft", legend = colnames(x[-1]), fill = 1:4)
})

-output

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

查找每个字符(A、B、C、D)在包含 3 个数据集的列表的每一列中出现的次数比例 的相关文章

随机推荐