我正在使用 ggstatsplot 来获取统计分析的可视化表示。
我有很多数据集,它们的构成都非常相似。有些工作正常,而另一些则不然。 data1 是一个工作示例,而 data2 不起作用。
data1 <- structure(list(
treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L),
.Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"),
value = c(1.74501, 2.04001, 1.89501, 1.84001,
1.89501, 9.75001, 8.50001, 8.80001, 11.50001, 10.25001, 7.90001,
9.25001, 11.45001, 7.75001, 7.75001, 7.55001, 8.70001, 8.20001,
6.95001, 6.60001, 7.40001, 7.15001, 8.25001, 9.20001, 8.95001,
6.45001, 6.05001, 5.40001, 7.95001, 6.80001, 4.65001, 6.40001,
6.40001, 6.70001, 5.40001, 3.20001, 2.70001, 4.30001, 4.10001,
3.60001, 4.00001, 3.00001, 4.70001, 3.10001, 3.50001, 6.45001,
5.45001, 4.90001, 7.25001, 4.55001, 4.70001, 6.25001, 5.65001,
6.00001, 5.10001)),
row.names = c(NA, -55L), class = c("tbl_df", "tbl", "data.frame"))
data2 <- structure(list(
treatment = structure(c(1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L),
.Label = c("negative_ctrl", "positive_ctrl", "treatmentA", "treatmentB", "treatmentC", "treatmentD"), class = "factor"),
value = c(1.00001, 1.00001, 1.00001, 1.00001, 1.00001, 6.77501,
5.68751, 5.99201, 8.24501, 7.01251, 4.79501, 5.99126, 8.26276,
5.35376, 5.38751, 4.60251, 5.38901, 4.85201, 4.44401, 5.20501,
6.20701, 5.77001, 4.05201, 3.65126, 3.02401, 4.68351, 3.90001,
2.56951, 3.70001, 3.61901, 3.96401, 2.93601, 1.53901, 1.40801,
2.05601, 2.08501, 1.89701, 1.79501, 1.50001, 2.09151, 1.53551,
1.57501, 3.88851, 3.09151, 2.75501, 4.40626, 2.42001, 2.60951,
3.83501, 3.37151, 3.70001, 2.92701)),
row.names = c(NA, -52L), class = c("tbl_df", "tbl", "data.frame"))
我将这两个数据集的最基本分析称为:
library(Rmpfr)
library(ggstatsplot)
ggstatsplot::ggbetweenstats(
data = data1,
x = treatment,
y = value,
messages = FALSE )
ggstatsplot::ggbetweenstats(
data = data2,
x = treatment,
y = value,
messages = FALSE )
对于 data1 我得到这个:
对于 data2 我得到:
> Error in stats::optim(par = 1.1 * rep(lambda, 2), fn = function(x) { : non-finite value supplied by optim
起初,我认为问题可能是我在阴性对照中传递的几个零,但我首先将它们稍微提高了一点,然后提高了 1,以确保值的范围不是问题。我能看到的唯一差异是,我在 data2 中只有 7 个而不是 10 个治疗 A(级别 3)测量值,但在 data1 中有 10 个测量值(由于样本失败,必须删除一些 NA)。然而,在这两种情况下,阴性对照(级别 1)只有 5 个值,我认为在这种类型的分析中,组之间的样本量不同不存在问题。