I am trying to extract all image URLs from an HTML string using regex /<img.*?src="(.*?)"[^>]+>/g
in a function like this:
function getImages(string) {
const imgRex = /<img.*?src="(.*?)"[^>]+>/g;
const images = [];
let img;
while ((img = imgRex.exec(string))) {
images.push(img[1]);
}
return images;
}
However, the results also contain non-image stuff, e.g.:
[
'https://www.facebook.com/tr?id=900220307025564&ev=PageView&noscript=1',
'https://cyclingmagazine.ca/wp-content/uploads/2020/10/Peloton-Bike_Cam-1200x675.jpg',
'https://cyclingmagazine.ca/wp-content/uploads/2020/10/ontario-creates-logo.png',
'data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"></div><div id=',
'https://cyclingmagazine.ca/wp-content/uploads/2019/03/cycling-pop-up.jpg'
]
which breaks subsequent execution. I am by no means a regex expert (clearly), would appreciate any help!
Do not use regex for this. Use the DOMParser API and its parseFromString() method.
let str = "<img src='https://www.example.com'>";
let DOMParsing = new DOMParser()
let parsed = DOMParsing.parseFromString(str, "text/html")
// Now you can use querySelector to target the wanted element
// or querySelectorAll and a loop for multiple elements
let imgURL = parsed.querySelector("img").src
console.log(imgURL)