我正在尝试从 cloudflare 网站上抓取数据。但无论我做什么,我都会收到 403 禁止错误。
我读到这是因为无头请求。有什么办法可以绕过这个吗?
我将在下面保留我当前的设置。我还能做些什么来防止被发现吗?
const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
puppeteer.use(StealthPlugin());
(async () => {
const args = [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-accelerated-2d-canvas",
"--no-zygote",
"--renderer-process-limit=1",
"--no-first-run",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-dev-shm-usage",
"--disable-infobars",
"--lang=en-US,en",
"--window-size=1920x1080",
"--disable-extensions",
];
const options = {
args,
headless: true,
ignoreHTTPSErrors: true,
userDataDir: "./tmp",
executablePath: "/snap/bin/chromium",
};
try {
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
page.on("response", (response) => {
console.log(response.status());
});
await page.setExtraHTTPHeaders({
"Accept-Language": "en,en-US;q=0,5",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
});
await page.waitFor(5000);
await page.goto(process.argv[2]);
await browser.close();
} catch (err) {
console.log(err);
}
})();