Retrieve elements with xpath and DOMDocument
I managed to get what I need with this code (based on Khue Vu's code) :
$d = new DOMDocument();
$d->loadHTML($ads); // the variable $ads contains the HTML code above
$xpath = new DOMXPath($d);
$ls_ads = $xpath->query('//a');
foreach ($ls_ads as $ad) {
// get ad url
$ad_url = $ad->getAttribute('href');
// set current ad object as new DOMDocument object so we can parse it
$ad_Doc = new DOMDocument();
$cloned = $ad->cloneNode(TRUE);
$ad_Doc->appendChild($ad_Doc->importNode($cloned, True));
$xpath = new DOMXPath($ad_Doc);
// get ad title
$ad_title_tag = $xpath->query("//div[@class='title']");
$ad_title = trim($ad_title_tag->item(0)->nodeValue);
// get ad image
$ad_image_tag = $xpath->query("//img/@src");
$ad_image = $ad_image_tag->item(0)->nodeValue;
}
for other elements, you just do the same:
foreach ($ls_ads as $ad) {
$ad_url = $ad->getAttribute('href');
print("AD URL : $ad_url");
$ad_Doc = new DOMDocument();
$ad_Doc->documentElement->appendChild($ad_Doc->importNode($ad));
$xpath = new DOMXPath($ad_Doc);
$img_src = $xpath->query("//img[@src]");
$title = $xpath->query("//div[@class='title']");
}