Commit 9dbb9910 authored by Gregor Morrill's avatar Gregor Morrill

Added method to parse language from HTML as 'html-lang' key + tests for #96.

parent 0ccc493b
......@@ -451,6 +451,35 @@ class Parser {
return ($out === '') ? NULL : $out;
}
/**
* This method parses the language of an element
* @param DOMElement $el
* @access public
* @return string
*/
public function language(DOMElement $el)
{
// element has a lang attribute; use it
if ($el->hasAttribute('lang')) {
return trim($el->getAttribute('lang'));
}
if ($el->tagName == 'html') {
// we're at the <html> element and no lang; check <meta> http-equiv Content-Language
foreach ( $this->xpath->query('.//meta[@http-equiv]') as $node )
{
if ($node->hasAttribute('http-equiv') && $node->hasAttribute('content') && strtolower($node->getAttribute('http-equiv')) == 'content-language') {
return trim($node->getAttribute('content'));
}
}
} else {
// check the parent node
return $this->language($el->parentNode);
}
return '';
} # end method language()
// TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) {
// If the URL is seriously malformed it’s probably beyond the scope of this
......@@ -741,7 +770,8 @@ class Parser {
return array(
'html' => $html,
'value' => unicodeTrim($this->innerText($e))
'value' => unicodeTrim($this->innerText($e)),
'html-lang' => $this->language($e)
);
}
......@@ -1000,6 +1030,9 @@ class Parser {
$return['url'][] = $this->resolveUrl($url);
}
// Language
$return['html-lang'] = $this->language($e);
// Make sure things are in alphabetical order
sort($mfTypes);
......
<?php
/**
* Tests of the language parsing methods within mf2\Parser
*/
namespace Mf2\Parser\Test;
use Mf2\Parser;
use Mf2;
use PHPUnit_Framework_TestCase;
class ParseLanguageTest extends PHPUnit_Framework_TestCase {
public function setUp() {
date_default_timezone_set('Europe/London');
}
/**
* Test with only <html lang>
*/
public function testHtmlLangOnly()
{
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
} # end method testHtmlLangOnly()
/**
* Test with only h-entry lang
*/
public function testHEntryLangOnly()
{
$input = '<html> <div class="h-entry" lang="en">This test is in English.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
} # end method testHEntryLangOnly()
/**
* Test with different <html lang> and h-entry lang
*/
public function testHtmlAndHEntryLang()
{
$input = '<html lang="en"> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
} # end method testHtmlAndHEntryLang()
/**
* Test with different <html lang>, h-entry lang, and h-entry without lang,
* which should inherit from the <html lang>
*/
public function testMultiLanguageInheritance()
{
$input = '<html lang="en"> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
$this->assertEquals('es', $result['items'][1]['properties']['html-lang']);
} # end method testMultiLanguageInheritance()
/**
* Test feed with .h-feed lang which contains multiple h-entries of different languages
* (or none specified), which should inherit from the .h-feed lang.
*/
public function testMultiLanguageFeed()
{
$input = '<html> <div class="h-feed" lang="en"> <h1 class="p-name">Test Feed</h1> <div class="h-entry">This test is in English.</div> <div class="h-entry" lang="es">Esta prueba está en español.</div> <div class="h-entry" lang="fr">Ce test est en français.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('en', $result['items'][0]['properties']['html-lang']);
$this->assertEquals('en', $result['items'][0]['children'][0]['properties']['html-lang']);
$this->assertEquals('es', $result['items'][0]['children'][1]['properties']['html-lang']);
$this->assertEquals('fr', $result['items'][0]['children'][2]['properties']['html-lang']);
} # end method testMultiLanguageFeed()
/**
* Test with language specified in <meta> http-equiv Content-Language
*/
public function testMetaContentLanguage()
{
$input = '<html> <meta http-equiv="Content-Language" content="es"/> <div class="h-entry">Esta prueba está en español.</div> </html>';
$parser = new Parser($input);
$result = $parser->parse();
$this->assertEquals('es', $result['items'][0]['properties']['html-lang']);
} # end method testMetaContentLanguage()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment