let picture = document.querySelector('.c-picture img').src;
if (!document.querySelector('.c-picture img').src) {
let picture = 'No Link'; } //throws error
如果没有图片的话document.querySelector()
返回 null,它没有src
财产。在尝试读取之前,您需要检查查询是否找到了元素src
财产。
将空值检查移至函数顶部还有一个额外的好处,即当您打算退出时可以节省不必要的计算。
async function scrape3() {
// ...
for (let i = 1; i <= 3; i++) {
//...Getting to correct page and scraping it three times
const result = await page.evaluate(() => {
const pictureElement = document.querySelector('.c-picture img');
if (!pictureElement) return null;
const picture = pictureElement.src;
const title = document.querySelector('h1').innerText;
const article = document.querySelector('.c-entry-content').innerText;
const source = "The Verge";
const categories = "Tech";
return {
title,
article,
picture,
source,
categories
}
});
if (!result) continue;
// ... do stuff with result
}
回答评论问题:“有没有办法跳过任何空白内容,然后返回其余内容?”
是的。您只需要在尝试读取其中的属性之前检查可能丢失的每个元素是否存在。在这种情况下,我们可以省略提前返回,因为您始终对所有结果感兴趣。
async function scrape3() {
// ...
for (let i = 1; i <= 3; i++) {
const result = await page.evaluate(() => {
const img = document.querySelector('.c-picture img');
const h1 = document.querySelector('h1');
const content = document.querySelector('.c-entry-content');
const picture = img ? img.src : '';
const title = h1 ? h1.innerText : '';
const article = content ? content.innerText : '';
const source = "The Verge";
const categories = "Tech";
return {
title,
article,
picture,
source,
categories
}
});
// ...
}
}
进一步的想法
由于我仍在讨论这个问题,让我更进一步,并使用您可能感兴趣的一些更高级别的技术对其进行重构。不确定这是否正是您所追求的,但它应该会给您一些帮助关于编写更易于维护的代码的想法。
// Generic reusable helper to return an object property
// if object exists and has property, else a default value
//
// This is a curried function accepting one argument at a
// time and capturing each parameter in a closure.
//
const maybeGetProp = default => key => object =>
(object && object.hasOwnProperty(key)) ? object.key : default
// Pass in empty string as the default value
//
const getPropOrEmptyString = maybeGetProp('')
// Apply the second parameter, the property name, making 2
// slightly different functions which have a default value
// and a property name pre-loaded. Both functions only need
// an object passed in to return either the property if it
// exists or an empty string.
//
const maybeText = getPropOrEmptyString('innerText')
const maybeSrc = getPropOrEmptyString('src')
async function scrape3() {
// ...
// The _ parameter name is acknowledging that we expect a
// an argument passed in but saying we plan to ignore it.
//
const evaluate = _ => page.evaluate(() => {
// Attempt to retrieve the desired elements
//
const img = document.querySelector('.c-picture img');
const h1 = document.querySelector('h1')
const content = document.querySelector('.c-entry-content')
// Return the results, with empty string in
// place of any missing properties.
//
return {
title: maybeText(h1),
article: maybeText(article),
picture: maybeSrc(img),
source: 'The Verge',
categories: 'Tech'
}
}))
// Start with an empty array of length 3
//
const evaluations = Array(3).fill()
// Then map over that array ignoring the undefined
// input and return a promise for a page evaluation
//
.map(evaluate)
// All 3 scrapes are occuring concurrently. We'll
// wait for all of them to finish.
//
const results = await Promise.all(evaluations)
// Now we have an array of results, so we can
// continue using array methods to iterate over them
// or otherwise manipulate or transform them
//
return results
.filter(result => result.title && result.picture)
.forEach(result => {
//
// Do something with each result
//
})
}