Commit 2b1713cc authored by Barnaby Walters's avatar Barnaby Walters

Fixed merge conflicts

parents ebfcece1 6e97408c
......@@ -4,3 +4,4 @@ composer.phar
/vendor/
/tmp
.idea/
/bin/test
......@@ -69,7 +69,7 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
$response = curl_exec($ch);
$html = curl_exec($ch);
$info = $curlInfo = curl_getinfo($ch);
curl_close($ch);
......@@ -78,7 +78,6 @@ function fetch($url, $convertClassic = true, &$curlInfo=null) {
return null;
}
$html = mb_substr($response, $info['header_size']);
return parse($html, $url, $convertClassic);
}
......@@ -122,7 +121,8 @@ function unicodeTrim($str) {
* @param string $prefix The prefix to look for
* @return string|array The prefixed name of the first microfomats class found or false
*/
function mfNamesFromClass($class, $prefix = 'h-') {
function mfNamesFromClass($class, $prefix='h-') {
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
$classes = explode(' ', $class);
$matches = array();
......@@ -147,9 +147,10 @@ function mfNamesFromClass($class, $prefix = 'h-') {
* @return array
*/
function nestedMfPropertyNamesFromClass($class) {
$prefixes = array(' p-', ' u-', ' dt-', ' e-');
$prefixes = array('p-', 'u-', 'dt-', 'e-');
$propertyNames = array();
$class = str_replace(array(' ', ' ', "\n"), ' ', $class);
foreach (explode(' ', $class) as $classname) {
foreach ($prefixes as $prefix) {
$compare_classname = strtolower(' ' . $classname);
......@@ -192,7 +193,7 @@ function convertTimeFormat($time) {
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
// if no am/pm specified
if ( empty($matches[4]) ) {
if (empty($matches[4])) {
return $time;
}
// else am/pm specified
......@@ -203,31 +204,27 @@ function convertTimeFormat($time) {
$hh = $matches[1];
// add 12 to the pm hours
if ( $meridiem == 'pm' && ($hh < 12) )
{
if ($meridiem == 'pm' && ($hh < 12)) {
$hh += 12;
}
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
// minutes
$mm = ( empty($matches[2]) ) ? '00' : $matches[2];
$mm = (empty($matches[2]) ) ? '00' : $matches[2];
// seconds, only if supplied
if ( !empty($matches[3]) )
{
if (!empty($matches[3])) {
$ss = $matches[3];
}
if ( empty($ss) ) {
if (empty($ss)) {
return sprintf('%s:%s', $hh, $mm);
}
else {
return sprintf('%s:%s:%s', $hh, $mm, $ss);
}
}
}
/**
......@@ -294,6 +291,11 @@ class Parser {
}
break;
}
// Ignore <template> elements as per the HTML5 spec
foreach ($this->xpath->query('//template') as $templateEl) {
$templateEl->parentNode->removeChild($templateEl);
}
$this->baseurl = $baseurl;
$this->doc = $doc;
......@@ -321,7 +323,33 @@ class Parser {
return true;
}
private function resolveChildUrls(DOMElement $el) {
$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
}
public function textContent(DOMElement $el) {
$this->resolveChildUrls($el);
$clonedEl = $el->cloneNode(true);
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
$imgEl->parentNode->replaceChild($newNode, $imgEl);
}
return $clonedEl->textContent;
}
// TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) {
// If the URL is seriously malformed it’s probably beyond the scope of this
......@@ -355,7 +383,7 @@ class Parser {
// Process value-class stuff
$val = '';
foreach ($valueClassElements as $el) {
$val .= $el->textContent;
$val .= $this->textContent($el);
}
return unicodeTrim($val);
......@@ -399,7 +427,7 @@ class Parser {
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
$pValue = $p->getAttribute('value');
} else {
$pValue = unicodeTrim($p->textContent);
$pValue = unicodeTrim($this->textContent($p));
}
return $pValue;
......@@ -434,7 +462,7 @@ class Parser {
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
return $u->getAttribute('value');
} else {
return unicodeTrim($u->textContent);
return unicodeTrim($this->textContent($u));
}
}
......@@ -596,17 +624,8 @@ class Parser {
// Expand relative URLs within children of this element
// TODO: as it is this is not relative to only children, make this .// and rerun tests
$hyperlinkChildren = $this->xpath->query('//*[@src or @href or @data]', $e);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
$this->resolveChildUrls($e);
$html = '';
foreach ($e->childNodes as $node) {
$html .= $node->C14N();
......@@ -614,7 +633,7 @@ class Parser {
return array(
'html' => $html,
'value' => unicodeTrim($e->textContent)
'value' => unicodeTrim($this->textContent($e))
);
}
......
......@@ -223,7 +223,7 @@ Pull requests very welcome, please try to maintain stylistic, structural and nam
4. Run PHPUnit with `./vendor/bin/phpunit`
5. Make your changes
6. Add PHPUnit tests for your changes, either in an existing test file if suitable, or a new one
7. Make sure your tests pass (`./vendor/bin/phpunit`)
7. Make sure your tests pass (`./vendor/bin/phpunit`), preferably using both PHP 5.3 and 5.4
8. Go to your fork of the repo on github.com and make a pull request, preferably with a short summary, detailed description and references to issues/parsing specs as appropriate
9. Bask in the warm feeling of having contributed to a piece of free software
......@@ -237,6 +237,23 @@ php-mf2 can also be hooked up to the official, cross-platform [microformats2 tes
### Changelog
#### v0.2.8
2014-07-17
* Fixed issue #51 causing php-mf2 to not work with PHP 5.3
* Fixed issue #52 correctly handling the `<template>` element by ignoring it
* Fixed issue #53 improving the plaintext parsing of `<img>` elements
#### v0.2.7
2014-06-18
* Added `Mf2\fetch()` which fetches content from a URL and returns parsed microformats
* Added implied `dt-end` discovery (thanks @gRegorLove)
* Fixed issue causing classnames like `blah e- blah` to produce properties with numeric keys (thanks @aaronpk and @gRegorLove)
* Fixed issue causing resolved URLs to not include port numbers (thanks @aaronpk)
#### v0.2.6
* Added JSON mode as long-term fix for #29
......
......@@ -86,4 +86,20 @@ class ParsePTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('http://example.com', $result['items'][0]['properties']['url'][0]);
}
/**
* @see https://github.com/indieweb/php-mf2/issues/53
* @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
*/
public function testConvertsNestedImgElementToAltOrSrc() {
$input = <<<EOT
<div class="h-entry">
<p class="p-name">The day I saw a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p>
<p class="p-summary">Blah blah <img src="/photos/five-legged-elephant.jpg" /></p>
</div>
EOT;
$result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant');
$this->assertEquals('The day I saw a five legged elephant', $result['items'][0]['properties']['name'][0]);
$this->assertEquals('Blah blah http://waterpigs.co.uk/photos/five-legged-elephant.jpg', $result['items'][0]['properties']['summary'][0]);
}
}
......@@ -30,21 +30,21 @@ class ParserTest extends PHPUnit_Framework_TestCase {
$expected = array('h-card');
$actual = Mf2\mfNamesFromClass('someclass h-card someotherclass', 'h-');
$this->assertEquals($actual, $expected);
$this->assertEquals($expected, $actual);
}
public function testMicroformatNameFromClassHandlesMultipleHNames() {
$expected = array('h-card', 'h-person');
$actual = Mf2\mfNamesFromClass('someclass h-card someotherclass h-person yetanotherclass', 'h-');
$this->assertEquals($actual, $expected);
$this->assertEquals($expected, $actual);
}
public function testMicroformatStripsPrefixFromPropertyClassname() {
$expected = array('name');
$actual = Mf2\mfNamesFromClass('someclass p-name someotherclass', 'p-');
$this->assertEquals($actual, $expected);
$this->assertEquals($expected, $actual);
}
public function testNestedMicroformatPropertyNameWorks() {
......@@ -52,7 +52,29 @@ class ParserTest extends PHPUnit_Framework_TestCase {
$test = 'someclass p-location someotherclass u-author';
$actual = Mf2\nestedMfPropertyNamesFromClass($test);
$this->assertEquals($actual, $expected);
$this->assertEquals($expected, $actual);
}
public function testMicroformatNamesFromClassIgnoresPrefixesWithoutNames() {
$expected = array();
$actual = Mf2\mfNamesFromClass('someclass h- someotherclass', 'h-');
$this->assertEquals($expected, $actual);
}
public function testMicroformatNamesFromClassHandlesExcessiveWhitespace() {
$expected = array('h-card');
$actual = Mf2\mfNamesFromClass(' someclass
h-card someotherclass ', 'h-');
$this->assertEquals($expected, $actual);
}
public function testMicroformatNamesFromClassIgnoresUppercaseClassnames() {
$expected = array();
$actual = Mf2\mfNamesFromClass('H-ENTRY', 'h-');
$this->assertEquals($expected, $actual);
}
public function testParseE() {
......@@ -71,7 +93,7 @@ class ParserTest extends PHPUnit_Framework_TestCase {
$output = $parser->parse();
$this->assertEquals('Blah blah <a href="http://example.com/a-url">thing</a>. <object data="http://example.com/object"></object> <img src="http://example.com/img"></img>', $output['items'][0]['properties']['content'][0]['html']);
$this->assertEquals('Blah blah thing.', $output['items'][0]['properties']['content'][0]['value']);
$this->assertEquals('Blah blah thing. http://example.com/img', $output['items'][0]['properties']['content'][0]['value']);
}
/**
......@@ -209,9 +231,30 @@ EOT;
$input = '<span class="h-entry"> <span class="e-">foo</span> </span>';
$parser = new Parser($input);
$output = $parser->parse();
// print_r($output);
$this->assertArrayNotHasKey('0', $output['items'][0]['properties']);
}
/**
* @see https://github.com/indieweb/php-mf2/issues/52
* @see https://github.com/tommorris/mf2py/commit/92740deb7e19b8f1e7fbf6bec001cf52f2b07e99
*/
public function testIgnoresTemplateElements() {
$result = Mf2\parse('<template class="h-card"><span class="p-name">Tom Morris</span></template>');
$this->assertCount(0, $result['items']);
}
/**
* @see https://github.com/indieweb/php-mf2/issues/53
* @see http://microformats.org/wiki/microformats2-parsing#parsing_an_e-_property
*/
public function testConvertsNestedImgElementToAltOrSrc() {
$input = <<<EOT
<div class="h-entry">
<p class="e-content">It is a strange thing to see a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p>
</div>
EOT;
$result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant');
$this->assertEquals('It is a strange thing to see a five legged elephant', $result['items'][0]['properties']['content'][0]['value']);
}
}
<p class="adr">665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A.</p>
\ No newline at end of file
{
"items": [{
"type": ["h-adr"],
"properties": {
"name": ["665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A."]
}
}]
}
\ No newline at end of file
{
"name": "Just a name",
"description": "adr",
"author": "Glenn Jones"
}
\ No newline at end of file
<p class="adr">
<span class="street-address">665 3rd St.</span>
<span class="extended-address">Suite 207</span>
<span class="locality">San Francisco</span>,
<span class="region">CA</span>
<span class="postal-code">94107</span>
<span class="country-name">U.S.A.</span>
</p>
\ No newline at end of file
{
"items": [{
"type": ["h-adr"],
"properties": {
"street-address": ["665 3rd St."],
"extended-address": ["Suite 207"],
"locality": ["San Francisco"],
"region": ["CA"],
"postal-code": ["94107"],
"country-name": ["U.S.A."],
"name": ["665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A."]
}
}]
}
\ No newline at end of file
{
"name": "Broken into properties",
"description": "h-adr",
"author": "Glenn Jones"
}
\ No newline at end of file
{
"name": "adr parsing tests",
"description": "This page was design to test the parsing of adr and its output to the newer JSON structure of micorformats 2. These tests are part of the micorformats 2 test suite."
}
\ No newline at end of file
<p class="geo">
<abbr class="latitude" title="37.408183">N 37° 24.491</abbr>,
<abbr class="longitude" title="-122.13855">W 122° 08.313</abbr>
</p>
\ No newline at end of file
{
"items": [{
"type": ["h-geo"],
"properties": {
"latitude": ["37.408183"],
"longitude": ["-122.13855"],
"name": ["N 37° 24.491, W 122° 08.313"]
}
}]
}
\ No newline at end of file
{
"name": "The <abbr> tag pattern",
"description": "geo",
"author": "Glenn Jones"
}
\ No newline at end of file
<p>
<span class="geo">The Bricklayer's Arms
<span class="latitude">
<span class="value-title" title="51.513458"> </span>
</span>
<span class="longitude">
<span class="value-title" title="-0.14812"> </span>
</span>
</span>
</p>
\ No newline at end of file
{
"items": [{
"type": ["h-geo"],
"properties": {
"latitude": ["51.513458"],
"longitude": ["-0.14812"],
"name": ["The Bricklayer's Arms"]
}
}]
}
\ No newline at end of file
{
"name": "Hidden value-title pattern",
"description": "geo",
"author": "Glenn Jones"
}
\ No newline at end of file
<p>On my way to The Bricklayer's Arms
(Geo: <span class="geo">51.513458;-0.14812</span>)
</p>
\ No newline at end of file
{
"items": [{
"type": ["h-geo"],
"properties": {
"name": ["51.513458;-0.14812"]
}
}]
}
\ No newline at end of file
{
"name": "Just a name",
"description": "geo",
"author": "Glenn Jones"
}
\ No newline at end of file
We are meeting at
<span class="geo">
<span>The Bricklayer's Arms</span>
(Geo: <span class="p-latitude">51.513458</span>:
<span class="p-longitude">-0.14812</span>)
</span>
\ No newline at end of file
{
"items": [{
"type": ["h-geo"],
"properties": {
"latitude": ["51.513458"],
"longitude": ["-0.14812"],
"name": ["The Bricklayer's Arms (Geo: 51.513458: -0.14812)"]
}
}]
}
\ No newline at end of file
{
"name": "Broken into properties",
"description": "geo",
"author": "Glenn Jones"
}
\ No newline at end of file
{
"name": "geo parsing tests",
"description": "This page was design to test the parsing of geo and its output to the newer JSON structure of micorformats 2. These tests are part of the micorformats 2 test suite."
}
\ No newline at end of file
<p>
<span class="geo">
<span class="latitude">
<span class="value-title" title="51.513458">N 51° 51.345</span>,
</span>
<span class="longitude">
<span class="value-title" title="-0.14812">W -0° 14.812</span>
</span>
</span>
</p>
\ No newline at end of file
{
"items": [{
"type": ["h-geo"],
"properties": {
"latitude": ["51.513458"],
"longitude": ["-0.14812"],
"name": ["N 51° 51.345, W -0° 14.812"]
}
}]
}
\ No newline at end of file
{
"name": "Value-title class pattern",
"description": "geo",
"author": "Glenn Jones"
}
\ No newline at end of file
<p class="h-adr">
<span class="p-name">Bricklayer's Arms</span>
<span class="p-label">
<span class="p-street-address">3 Charlotte Road</span>,
<span class="p-locality">City of London</span>,
<span class="p-postal-code">EC2A 3PE</span>,
<span class="p-country-name">UK</span>