所以环顾四周我发现这个问题。您通过询问特定于组的 ecdf 值来对此进行了一些扩展,因此我们可以使用do
dplyr 中的函数 (这是一个例子] 这样做。有一些slight比较此表和 ggplot 中的值时,值存在差异,我不太确定这是为什么。可能只是 mtcars 数据集有点小,所以如果您在更大的数据集上运行它,我预计它会更接近实际值。
#Sort by mpg
mtcars <- mtcars[order(mtcars$mpg),]
#Make arbitrary ranking variable based on mpg
mtcars <- mtcars %>% mutate(Rank = dense_rank(mpg))
#Make variable for percent picked
mtcars <- mutate(mtcars, Percent_Picked = Rank/max(mtcars$Rank))
#Make cyl categorical
mtcars$cyl<-cut(mtcars$cyl, c(3,5,7,9), right=FALSE, labels=c(4,6,8))
#Make the graph
ggplot(mtcars, aes(Percent_Picked, color = cyl)) +
stat_ecdf(size=1) +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent)
create_ecdf_vals <- function(vec){
df <- data.frame(
x = unique(vec),
y = ecdf(vec)(unique(vec))*length(vec)
) %>%
mutate(y = scale(y, center = min(y), scale = diff(range(y)))) %>%
union_all(data.frame(x=c(0,1),
y=c(0,1))) # adding in max/mins
return(df)
}
mt.ecdf <- mtcars %>%
group_by(cyl) %>%
do(create_ecdf_vals(.$Percent_Picked))
mt.ecdf %>%
summarise(q25 = y[which.max(x[x<=0.25])],
q50 = y[which.max(x[x<=0.5])],
q75 = y[which.max(x[x<=0.75])])
ggplot(mt.ecdf,aes(x,y,color = cyl)) +
geom_step()
~EDIT~
经过一番挖掘后ggplot2
文档中,我们实际上可以使用以下命令显式地从图中提取数据layer_data
功能。
my.plt <- ggplot(mtcars, aes(Percent_Picked, color = cyl)) +
stat_ecdf(size=1) +
scale_x_continuous(labels = scales::percent) +
scale_y_continuous(labels = scales::percent)
plt.data <- layer_data(my.plt) # magic happens here
# and here's the table you want
plt.data %>%
group_by(group) %>%
summarise(q25 = y[which.max(x[x<=0.25])],
q50 = y[which.max(x[x<=0.5])],
q75 = y[which.max(x[x<=0.75])])