Commit 7c2ca77a authored by Aaron Parecki's avatar Aaron Parecki Committed by GitHub

Merge pull request #97 from gRegorLove/i18n

Experimental language parsing
parents 034ca67b 20f618a7
......@@ -452,6 +452,35 @@ class Parser {
return ($out === '') ? NULL : $out;
}
/**
* This method parses the language of an element
* @param DOMElement $el
* @access public
* @return string
*/
public function language(DOMElement $el)
{
// element has a lang attribute; use it
if ($el->hasAttribute('lang')) {
return unicodeTrim($el->getAttribute('lang'));
}
if ($el->tagName == 'html') {
// we're at the <html> element and no lang; check <meta> http-equiv Content-Language
foreach ( $this->xpath->query('.//meta[@http-equiv]') as $node )
{
if ($node->hasAttribute('http-equiv') && $node->hasAttribute('content') && strtolower($node->getAttribute('http-equiv')) == 'content-language') {
return unicodeTrim($node->getAttribute('content'));
}
}
} else {
// check the parent node
return $this->language($el->parentNode);
}
return '';
} # end method language()
// TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) {
// If the URL is seriously malformed it’s probably beyond the scope of this
......@@ -740,10 +769,17 @@ class Parser {
$html .= $node->ownerDocument->saveHTML($node);
}
return array(
$return = array(
'html' => $html,
'value' => unicodeTrim($this->innerText($e))
'value' => unicodeTrim($this->innerText($e)),
);
// Language
if ( $html_lang = $this->language($e) ) {
$return['html-lang'] = $html_lang;
}
return $return;
}
private function removeTags(\DOMElement &$e, $tagName) {
......@@ -1001,6 +1037,11 @@ class Parser {
$return['url'][] = $this->resolveUrl($url);
}
// Language
if ( $html_lang = $this->language($e) ) {
$return['html-lang'] = $html_lang;
}
// Make sure things are in alphabetical order
sort($mfTypes);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment