Parse entire HTML document from string into jQuery
The reason why it does not work is because jQuery expects a DOM node to find the 'title' tags. As you noted, you need to parse the html text first.
From here and here, the solution is to parse the string and append it into a temporal div (or other element):
var tempDom = $('<div></div>').append($.parseHTML(str));
Then, you can manipulate tempDom
to find elements.
Working demo: http://codepen.io/anon/pen/wKwLMP
TL;DR ... use the DOMParser
API
var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";
var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");
Unfortunately, there current answers don't hit a lot of edge cases
You should not use $.parseHTML(htmlString)
as it's immediately lossy. If we check the source code on $.parseHtml
, it'll call buildFragment
which creates a temporary DOM element and sets the innerHTML
property.
Element.innerHTML
provides an API for:
- Parsing (string -> DOM) in the write operation
- Serializing (DOM -> string) in the read operation
And here's the spec for Html Fragment Parsing Algorithm
Taking a sample string, here's the result of trying various HTML Parsing approaches:
var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";
function ParseHtmlTests() {
/*** $.parseHTML ***/
var $parseHtml = $.parseHTML(htmlString)
console.LogOutput(
'1. $.parseHTML',
$parseHtml,
$parseHtml.map(function(el, i) { return el.outerHTML }),
$($parseHtml).find("title").text(),
$($parseHtml).find(".content").text()
)
/*** tempDiv.innerHTML ***/
var tempDiv = document.createElement("div")
tempDiv.innerHTML = htmlString
console.LogOutput(
'2. tempDiv.innerHTML',
tempDiv,
tempDiv.outerHTML,
$(tempDiv).find("title").text(),
$(tempDiv).find(".content").text()
)
/*** divAppendContents ***/
var $divAppendContents = $('<div></div>').append(htmlString)
console.LogOutput(
'3. divAppendContents',
$divAppendContents,
$divAppendContents.html(),
$divAppendContents.find("title").text(),
$divAppendContents.find(".content").text()
)
/*** tempHtml.innerHTML ***/
var tmpHtml = document.createElement( 'html' );
tmpHtml.innerHTML = htmlString;
console.LogOutput(
'4. tempHtml.innerHTML',
tmpHtml,
tmpHtml.outerHTML,
tmpHtml.getElementsByTagName('title')[0].innerText,
tmpHtml.getElementsByClassName('content')[0].innerText
)
/*** DOMParser.parseFromString ***/
var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");
console.LogOutput(
'5. DOMParser.parseFromString',
htmlDoc,
htmlDoc.documentElement.outerHTML,
htmlDoc.documentElement.getElementsByTagName('title')[0].innerHTML,
htmlDoc.documentElement.getElementsByClassName('content')[0].innerHTML
)
}
/*** Create Console Log Methods ***/
console.group = console.group || function(msg) {
console.log(msg)
}
console.groupEnd = console.groupEnd || function(msg) {
console.log("----------------------------")
}
console.LogOutput = function(method, dom, html, title, content) {
console.group(method);
console.log("DOM:", dom)
console.log("HTML:", html)
console.log("Title:", title)
console.log("Content:", content)
console.groupEnd();
};
/*** Execute Script ***/
ParseHtmlTests()
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.js"></script>
And here's the output from the above script in chrome:
The best approach seems to be creating a HTML Root object by setting the innerHTML
of a temporary HTML document or by using the DOMParser
API
Further Reading:
- Parse an HTML string with JS
- Parsing of html string using jquery
- jQuery not finding elements in
jQuery.parseHTML()