具有可变宽度的重叠条形图/直方图

2024-05-05

I have

chr  totgenes  FST>0.4  %FST>0.4  exFST>0.4  %exFST>0.4  inFST>0.4  %inFST>0.4  chrtotlen
1    1457      49       3.36307   73         5.0103      54         3.70625     114375790
1A   1153      49       4.24978   72         6.24458     48         4.1630      70879221
2    1765      80       4.53258   132        7.47875     96         5.43909     151896526
3    1495      33       2.20736   56         3.74582     35         2.34114     111449612
4    953       58       6.08604   89         9.33893     56         5.87618     71343966
4A   408       9        2.20588   17         4.16667     11         2.69608     19376786
5    1171      52       4.44065   81         6.91716     44         3.75747     61898265
6    626       48       7.66773   62         9.90415     47         7.50799     34836644
7    636       8        1.25786   24         3.77358     8          1.25786     38159610
8    636       24       3.77358   28         4.40252     27         4.24528     30964699
9    523       18       3.44168   23         4.39771     21         4.0153      25566760

我想使用条形图,其中 y 是列 FST>0.4 exFST>0.4 inFST>0.4 的值,x 是 chr col,条形的宽度是 chrtotlen。

我正在尝试使用

data<-read.table("realBFWBM_noNAs.fst.totgenesChrcp", sep="\t", header = TRUE)
myVector <- c("chr", "FST.0.4", "exFST.0.4", "inFST.0.4", "chrtotlen")
melted <-melt(data[,myVector], id = c("chr", "chrtotlen") 
ggplot(melted, aes(x=as.factor(chr), y=value, width=chrtotlen))+ 
  geom_bar(aes(fill=variable), stat = "identity")+
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.title = element_blank(),
    legend.position = c(0.8, 0.8),
    axis.title.x=element_text(size=20),
    text = element_text(size=20),
    axis.text.x = element_text(size=20),
    panel.background = element_blank(),
    axis.text.y = element_text(size=20)
  )

but I'm getting an overlapping plot enter image description here

我也收到错误"position_stack requires non-overlapping x intervals"

在基础 R 方面取得了一些进展,但仍有工作要做,因为轴的行为未达到预期。

data<-read.table("realBFWBM_noNAs.fst.totgenesChrcp", sep="\t", header = TRUE)
myVector <- c("chr", "FST.0.4", "exFST.0.4", "inFST.0.4", "chrtotlen")
counts = data[,myVector]
par(xpd = TRUE, mar = c(4,4,2,2))
invisible(sapply(2:4, function(x)
  barplot(counts[, x], as.vector(counts$chrtotlen), axes = FALSE, axisnames = FALSE,
          #border = 0.5,
          density = x + 5,
          angle = x ^ 5,
          space=0,
          axis.lty = 1, ylim = c(0, 150), 
          add  = ifelse(x == 2, FALSE, TRUE))))

axis(2, at = seq(0, 100, 150), labels = seq(0, 100 , 150))
axis(1, at = barplot(counts), labels = colnames(counts))

这不是超级容易,但一个相当直接的解决方法是手动构建情节geom_rect.

我无耻地改编了下面两个线程的想法,这个问题几乎是重复的

  • 如何使ggplot2中的可变条形宽度不重叠或间隙 https://stackoverflow.com/questions/20688376/how-to-make-variable-bar-widths-in-ggplot2-not-overlap-or-gap and
  • ggplot 中不同宽度的堆积条形图 https://stackoverflow.com/questions/51136471/stacked-bar-chart-with-varying-widths-in-ggplot

轴问题是通过用连续轴伪造离散轴来解决的。然后将伪离散标签分配给连续中断。

library(tidyverse)
df <- read.table(header = T, text = "    chr  totgenes  FST>0.4  %FST>0.4  exFST>0.4  %exFST>0.4  inFST>0.4  %inFST>0.4  chrtotlen
    1    1457      49       3.36307   73         5.0103      54         3.70625     114375790
    1A   1153      49       4.24978   72         6.24458     48         4.1630      70879221
    2    1765      80       4.53258   132        7.47875     96         5.43909     151896526
    3    1495      33       2.20736   56         3.74582     35         2.34114     111449612
    4    953       58       6.08604   89         9.33893     56         5.87618     71343966
    4A   408       9        2.20588   17         4.16667     11         2.69608     19376786
    5    1171      52       4.44065   81         6.91716     44         3.75747     61898265
    6    626       48       7.66773   62         9.90415     47         7.50799     34836644
    7    636       8        1.25786   24         3.77358     8          1.25786     38159610
    8    636       24       3.77358   28         4.40252     27         4.24528     30964699
    9    523       18       3.44168   23         4.39771     21         4.0153      25566760")

# reshape and rescale the width variable
newdf <- 
  df %>% 
  pivot_longer(cols = matches("^ex|^in|^FST"), values_to = "value", names_to = "key") %>%
  mutate(rel_len = chrtotlen/max(chrtotlen))

# idea from linked thread 1
w <- unique(newdf$rel_len)
xlab <- unique(newdf$chr)
pos <- cumsum(w) + cumsum(c(0, w[-length(w)]))

# This is to calculate the x position for geom_rect
xmin <- zoo::rollmean(c(0, pos), 2)
pos_n <- tail(pos, 1)
xmax <- c(tail(xmin, -1), sum(pos_n, (pos_n - tail(xmin, 1))))
# To know how often to replicate the elements, I am using rle
replen <- rle(newdf$chr)$lengths
newdf$xmin <- rep(xmin, replen)
newdf$xmax <- rep(xmax, replen)
# This is to calculate ymin and ymax
newdf <- newdf %>%
  group_by(chr) %>% 
  mutate(ymax = cumsum(value), ymin = lag(ymax, default = 0))


# Finally, the plot
ggplot(newdf) + 
  geom_rect(aes(xmin = xmin, xmax = xmax, 
                ymin = ymin, ymax = ymax, fill = key)) +
  scale_x_continuous(labels = xlab, breaks = pos)

Created on 2021-02-14 by the reprex package https://reprex.tidyverse.org (v1.0.0)

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

具有可变宽度的重叠条形图/直方图 的相关文章

随机推荐