Predict.gbm() 2.1.4 与 2.1.3 的预测不一致

2023-12-21

这个问题和我之前的帖子有关here https://stackoverflow.com/questions/52702984/inconsistent-predictions-from-gbm-predict.

我已经找到了这个问题,它似乎与我使用的 GBM 版本有关。最新版本 2.1.4 在我的系统上出现了问题(R 3.4.4 和 3.5;都在 Ubuntu 18.04 上),而版本 2.1.3 按预期工作:

mydata <- structure(list(Count = c(1L, 3L, 1L, 4L, 1L, 0L, 1L, 2L, 0L, 0L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 0L, 2L, 3L, 1L, 4L, 3L, 0L, 4L, 1L, 2L, 1L, 1L, 0L, 2L, 1L, 4L, 1L, 5L, 3L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 1L, 1L, 0L, 3L, 1L, 1L, 0L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 0L, 0L, 3L, 5L, 1L, 2L, 1L, 1L, 0L, 0L, 1L, 2L, 1L, 3L, 1L, 1L, 0L, 2L, 2L, 1L, 3L, 3L, 2L, 0L, 0L, 1L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 4L, 2L), Treat1 = structure(c(10L, 14L, 8L, 2L, 3L, 12L, 1L, 10L, 6L, 2L, 11L, 11L, 15L, 1L, 8L, 3L, 13L, 9L, 9L, 11L, 1L, 8L, 14L, 5L, 10L, 8L, 15L, 11L, 7L, 6L, 13L, 11L, 7L, 1L, 1L, 2L, 7L, 12L, 5L, 1L, 8L, 1L, 9L, 8L,12L, 14L, 12L, 7L, 8L, 14L, 3L, 3L, 5L, 1L, 1L, 11L, 6L, 5L, 5L, 13L, 9L, 3L, 8L, 9L, 13L, 9L, 7L, 9L, 2L, 6L, 10L, 3L, 11L, 4L, 3L, 15L, 12L, 6L, 4L, 3L, 8L, 8L, 11L, 1L, 11L, 2L, 11L, 5L, 12L, 6L, 8L, 14L, 1L, 9L, 9L, 10L, 10L, 5L, 14L, 3L), .Label = c("D", "U", "R", "E", "C", "Y", "L", "O", "G", "T", "N", "J", "V", "X", "A"), class = "factor"), Treat2 = structure(c(15L, 13L, 7L, 8L, 2L, 5L, 15L, 4L, 2L, 7L, 6L, 2L, 3L, 14L, 10L, 7L, 7L, 14L, 11L, 7L, 6L, 1L, 5L, 13L, 11L, 6L, 10L, 5L, 3L, 1L, 7L, 9L, 6L, 10L, 5L, 11L, 15L, 9L, 7L, 11L, 10L, 2L, 3L, 3L, 5L, 11L, 8L, 6L,4L, 5L, 15L, 8L, 8L, 2L, 2L, 10L, 4L, 1L, 10L, 11L, 10L, 8L, 7L, 7L, 8L, 14L, 16L, 11L, 10L, 9L, 3L, 15L, 13L, 1L, 11L, 11L, 9L, 7L, 10L, 9L, 3L, 7L, 5L, 13L, 3L, 14L, 10L, 10L, 15L, 13L, 15L, 12L, 14L, 11L, 5L, 4L, 2L, 3L, 11L, 10L), .Label = c("B", "X", "R", "H", "L", "D", "U", "Q", "K", "C", "T", "V", "J", "E", "F", "A"), class = "factor"), Near = c(0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0), Co1 = c(2, 5, 1, 1, 0, 1, 1, 2, 1, 2, 5, 2, 1, 0, 1, 2, 6, 3, 3, 1, 2, 2, 3, 0, 1, 0, 1, 0, 2, 1, 0, 1, 2, 3, 1, 2, 2, 0, 0, 2, 3, 3, 1, 1, NA, 2, 0, 2, 1, NA, 1, 1, 0, 1, 2, 0, 2, 1, 1, 1, 2, 3, 1, 0, 4, 0, 0, 0, 2, 2, 1, 1,2, 0, 1, 2, 1, 0, 0, 0, 0, 2, 1, 2, 2, 2, 2, 1, 0, 1, 1, 1, 1, 1, 0, 2, 0, 0, 5, 1), Co2 = c(1, 1, 2, 2, 4, 1, 3, 0, 5, 2, 2, 4, 1, 1, 2, 1, 2, 3, 0, 2, 3, 3, 0, 3, 1, 0, 1, 1, 1, 2, 0, 1, 1, 1, 2, 3, 2, 2, 3, 0, 0, 0, 1, 2, NA, 1, 1, 1, 0, 2, 1, 1, 2, 5, 0, 2, 1, 4, 1, 1, 3, 0, 1, 1, 1, 1, NA, 0, 2, 1, 1, 3, 2, 1, 2, 1, 3, 1, 2, 0, 1, 5, 2, 2, 1, 2, 3, 4, 3, 1, 1, 0, 5, 1, 1, 0, 1, 1, 2, 0)), .Names = c("Count", "Treat1", "Treat2", "Near", "Co1", "Co2"), row.names = c(1759L, 959L, 1265L, 1504L, 630L, 1905L, 1885L, 1140L, 1187L, 1792L, 1258L, 1125L, 756L, 778L, 1718L, 1797L, 388L, 715L, 63L, 311L, 1492L, 1128L, 629L, 536L, 503L, 651L, 1684L, 1893L, 721L, 1440L, 1872L, 1444L, 1593L, 143L, 1278L, 1558L, 1851L, 1168L, 1829L, 386L, 365L, 849L, 429L, 155L, 11L, 1644L, 101L, 985L, 72L, 459L, 1716L, 844L, 1313L, 77L, 1870L, 744L, 219L, 513L, 644L, 831L, 338L, 284L, 211L, 1096L,243L, 1717L, 1881L, 1784L, 1017L, 992L, 45L, 707L, 489L, 1267L, 1152L, 1819L, 995L, 510L, 1350L, 1700L, 56L, 1754L, 725L, 1625L, 319L, 1818L, 1287L, 1634L, 953L, 1351L, 1787L, 923L, 917L, 484L, 886L, 390L, 1531L, 679L, 1811L, 1736L), class = "data.frame")

detach("package:gbm", unload = TRUE )
remove.packages("gbm")
require(devtools)

install_version("gbm", version = "2.1.3")

set.seed(12345)
require(gbm)

n.trees <- 10000

m1.gbm <- gbm(Count ~ Treat1 + Treat2 + Near + Co1 + Co2, data = mydata, distribution = "poisson", n.trees = n.trees)

head(predict(m1.gbm, newdata = mydata, n.trees = n.trees, type = "response"))
[1] 0.8620154 2.8210216 0.8800267 3.7808341 0.4749737 0.3716022

predict(m1.gbm, newdata = head(mydata), n.trees = n.trees, type = "response")
[1] 0.8620154 2.8210216 0.8800267 3.7808341 0.4749737 0.3716022

...正如预期的那样。然而,

detach("package:gbm", unload = TRUE )
remove.packages("gbm")

install.packages("gbm", dependencies = TRUE)
# I had to restart R after this, otherwise the following line failed with:
# Loading required package: gbm
# Error: package or namespace load failed for ‘gbm’ in get(method, envir = home):
# lazy-load database '/home/container/R/x86_64-pc-linux-gnu-library/3.5/gbm/R/gbm.rdb' is corrupt 
require(gbm)

m1.gbm <- gbm(Count ~ Treat1 + Treat2 + Near + Co1 + Co2, data = mydata, distribution = "poisson", n.trees = n.trees)

head(predict(m1.gbm, newdata = mydata, n.trees = n.trees, type = "response"))
[1] 0.7524109 2.8789957 0.7843470 4.1724821 0.4525449 0.2036923

predict(m1.gbm, newdata = head(mydata), n.trees = n.trees, type = "response")
[1] 2.2216079 1.2806235 0.9109426 2.2842149 2.4828922 0.6124778

...这在我之前的帖子中展示了这个问题。

我觉得这很令人惊讶,因为gbm是一个众所周知的软件包,虽然我看到小插图是上个月更新的,所以也许最新版本是最近才发布的。我无法找到确切的日期here https://cran.r-project.org/src/contrib/Archive/gbm/。继续这里的最佳方法是什么?


None

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

Predict.gbm() 2.1.4 与 2.1.3 的预测不一致 的相关文章

随机推荐