I have the code I obtained with some help and it can be found at this link HERE. It works well in principle, but the problem is that it returns links and arrays for each page separately. However, I want it to return links in a single array for all pages together. I need to create a string var all_links = []
and push all the links from each page into it all_links.push(...links)
and then return them like return all_links
But the problem is that I’m failing because I don’t know exactly how to do it in this case. I am a pure beginner with no prior knowledge in coding
const puppeteer = require('puppeteer')
const minDiscount = 20;
async function getLinks() {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
const url = 'https://www.mytoys.de/spielzeug-spiele/holz/';
await page.goto(url);
// getting all the products, this will return an array of ElementHandle
while(await page.$(".pager__link--next")){
await page.waitForSelector(".pager__link--next")
await page.waitForTimeout(1000);
await page.click('.pager__link--next')
await page.waitForTimeout(1500);
const products = await page.$$('.prod-grid.js-prod-grid .prod-grid__item.js-prod-grid_item');
const proms = await Promise.allSettled(
products.map(async (prod) => {
// searching for a discount on each product
const disc = await prod.$$eval(
'.prod-grid.js-prod-grid .prod-flag.prod-flag-sale',
(discount) =>
discount.map((discItem) =>
discItem.innerText.replace(/[^0-9.]/g, '').replace(/\D+/g,'0')
)
);
// if it has a discount
if (disc.length > 0) {
// we parse the discount to Integer type to compare it to minDiscount
const discountInt = parseInt(disc[0], 10);
if (discountInt >= minDiscount) {
// we get the link of the product
const link = await prod.$$eval('.prod-grid.js-prod-grid .prod-tile__link.js-prodlink', (allAs) => allAs.map((a) => a.href));
if (link.length > 0) {
// push an object containing the discount and the link of the product
return link[0];
}
}
}
return null;
})
);
const bulkArray = proms.map((item) => {
if (item.status === 'fulfilled') return item.value;
});
const endArray = bulkArray.filter(item => item !== null);
console.log(endArray);
}
}
getLinks();
Please help and be merciful as I am just starting to learn the basics
Here is a cleanup to get you started:
const products = await page.$$eval('.prod-tile', divs => divs.map(div => {
return {
url: div.querySelector('a')?.href
discount: div.querySelector('.prod-flag-sale')?.innerText
}
}))
At this point just follow the next links and do the same thing for each page.