waitForSelector 找不到相关部分

2024-02-07

我使用以下代码与 Puppeteer 来检查相关部分是否已加载,但它总是返回加载错误。这可能是什么原因?

缺少哪一部分我不明白。

我正在使用下面的代码:

async function getResults(lnk) {
  const results = [];
  const timeFrames = [1, 5, 15];

  const browser = await puppeteer.launch({
    headless: true,

    args: [
      "--no-sandbox",
      "--disable-setuid-sandbox",
      "--disable-dev-shm-usage",
      "--single-process",
      "--no-zygote",
      // "--remote-debugging-port"
    ],
    timeout: 0,

    executablePath:
      process.env.NODE_ENV === "production"
        ? process.env.PUPPETEER_EXECUTABLE_PATH
        : puppeteer.executablePath(),
  });

  const page = await browser.newPage();

  for (const i of timeFrames) {
    const url = `${lnk}?timeFrame=${i * 60}`;
    console.log(`Getting: ${url}`);

    await page.goto(url, {waitUntil: "networkidle0"});

    console.log("Process 1");
    // await page.waitForTimeout(10000); // wait for 10 seconds
    // await page.waitForNavigation({waitUntil: "domcontentloaded"});
    // await page.waitForTimeout(1000)

    await page
      .waitForSelector("section.forecast-box-graph")
      .then(async () => {
        // await page.waitForSelector('h1.main-title.js-main-title');
        console.log("Getting");

        const status = await page.$eval(
          "section.forecast-box-graph .title",
          el => el.textContent
        );
        const Bank_Name = await page.$eval(
          "h1.main-title.js-main-title",
          el => el.textContent.trim()
        );
        results.push(status);
        await page.close();
      })
      .catch(() => console.log("Loading error"));
  }
  await browser.close();

  results.push(lnk.split("/").pop().split("-").join(" "));

  return results;
}

And this is the related part. and link here https://in.investing.com/equities/axis-bank-technical?timeFrame=60 https://in.investing.com/equities/axis-bank-technical?timeFrame=60 enter image description here


您在错误的位置关闭了页面,将所有内容放入 for 循环中(如下所示)并增加视口大小将解决您的问题。

const puppeteer = require("puppeteer");

let browser;
(async () => {    

async function getResults(lnk) {
    let results = [];
    const timeFrames = [1, 5, 15];

    for (const i of timeFrames) {            
        const browser = await puppeteer.launch({headless: true});
        const page = await browser.newPage();
        await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36");
        await page.setViewport({width: 1920, height: 1080});

        const url = `${lnk}?timeFrame=${i * 60}`;
        console.log(url);

        await page.goto(url, {waitUntil: "networkidle2", timeout: 70000});

        await page.waitForSelector('section.forecast-box-graph');

        const status = await page.$eval("section.forecast-box-graph .title", el => el.textContent);
        const bank_name = await page.$eval("h1.main-title.js-main-title", (el) => el.textContent.trim());

        results.push({
            bankName: bank_name,
            status: status,
            lnk: lnk.replace(/-/g, ' ').split('/').pop(),
            url: url
        });

        await browser.close();
    }

    return results;

}

console.log( await getResults('https://in.investing.com/equities/axis-bank-technical'));


})().catch(err => console.error(err)). finally(() => browser?. close());

如何继续运行Render https://render.com/ (source https://youtu.be/6cm6G78ZDmM)

package.json - 添加以下依赖项

"dotenv": "^16.0.3",
"express": "^4.18.2",
"puppeteer": "^20.1.2"

index.js

const express = require("express");
const { scrape } = require('./scrape');
const app = express();

const PORT = process.env.PORT || 3000;


app.get("/scrape", (req,res) => {
    scrape(req.query.url,res);
});

app.get("/", (req, res) => {
    res.send("test is running");
});

app.listen(PORT, () => {
    console.log(`Listening on port ${PORT}`);
});

Docker文件

FROM ghcr.io/puppeteer/puppeteer:20.1.2

ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
    PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable

WORKDIR /usr/src/app

COPY package*.json ./
RUN npm ci
COPY . .
CMD [ "node", "index.js" ]

scrape.js - 稍微修改上面的代码

const puppeteer = require('puppeteer');
require("dotenv").config();

const scrape = async (lnk,res) => {

    const browser = await puppeteer.launch({
        headless: true, // "new" gives error on render
        args: [
            "--no-sandbox",
            "--disable-setuid-sandbox",
            "--disable-dev-shm-usage",
            "--single-process",
            "--no-zygote",
        ], 
        executablePath: 
            process.env.NODE_ENV === 'production'
                ? process.env.PUPPETEER_EXECUTABLE_PATH
                : puppeteer.executablePath(),
    });
    let results = [];
    const timeFrames = [1, 5, 15];

    for (const i of timeFrames) {            
        const page = await browser.newPage();   
        const url = `${lnk}?timeFrame=${i * 60}`;
        console.log(url);
        try {            
            await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36");
            await page.setViewport({width: 1920, height: 1080});

            //skip loading non-essentials
            await page.setRequestInterception(true);
            await page.on('request', async (req) => (/image|imageset|media|stylesheet|font|script/.test(req.resourceType()) && !req.isInterceptResolutionHandled()) 
                ? await req.respond({status: 200, body: 'aborted'}) 
                : await req.continue()
            );

            await page.goto(url, {waitUntil: "load", timeout: 7000});

            await page.waitForSelector('section.forecast-box-graph');

            const status = await page.$eval("section.forecast-box-graph .title", el => el.textContent);
            const bank_name = await page.$eval("h1.main-title.js-main-title", (el) => el.textContent.trim());

            results.push({
                bankName: bank_name,
                status: status,
                lnk: lnk.replace(/-/g, ' ').split('/').pop(),
                url: url
            });
        } catch (err) {
            results.push({
                url : url,
                lnk: lnk.replace(/-/g, ' ').split('/').pop(),
                error : err
            });
        } finally {
            await page.close();
        }      
    }

    res.send(results);

    await browser.close();

};

module.exports = {scrape};

.gitignore

/node_modules
  • 将所有内容推送到新的 github 存储库,
  • 打开 Render,创建新的 Web 服务,连接或添加您的 git 存储库
  • 名称:任意,运行时:应该是 Docker,
  • click Advanced, add environment variable
    • 键:PUPPETEER_SKIP_CHROMIUM_DOWNLOAD,值:true
    • 键:PUPPETEER_EXECUTABLE_PATH,值:/usr/bin/google-chrome-stable
  • 单击创建 Web 服务
  • 等待设置完成,然后转到<URL>它给你,你会看到“测试正在运行”消息。
  • 接下来去<URL>/scrape?url=https://in.investing.com/equities/axis-bank-technical你会得到result https://i.stack.imgur.com/GuziQ.jpg

注意:

  • waitUntil可以从"load" to "networkidle0" or "networkidle2",他们都工作。
  • 改变headless: true to headless: "new"正如弃用警告所暗示的那样,即使它在本地运行良好,也会在渲染上出现错误。
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

waitForSelector 找不到相关部分 的相关文章

随机推荐