Commit 6ffd9846 authored by Barnaby Walters's avatar Barnaby Walters

Implemented <img> plaintext substitution, closed #53

parent 54d20837
......@@ -321,7 +321,33 @@ class Parser {
return true;
}
private function resolveChildUrls(DOMElement $el) {
$hyperlinkChildren = $this->xpath->query('.//*[@src or @href or @data]', $el);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
}
public function textContent(DOMElement $el) {
$this->resolveChildUrls($el);
$clonedEl = $el->cloneNode(true);
foreach ($this->xpath->query('.//img', $clonedEl) as $imgEl) {
$newNode = $this->doc->createTextNode($imgEl->getAttribute($imgEl->hasAttribute('alt') ? 'alt' : 'src'));
$imgEl->parentNode->replaceChild($newNode, $imgEl);
}
return $clonedEl->textContent;
}
// TODO: figure out if this has problems with sms: and geo: URLs
public function resolveUrl($url) {
// If the URL is seriously malformed it’s probably beyond the scope of this
......@@ -355,7 +381,7 @@ class Parser {
// Process value-class stuff
$val = '';
foreach ($valueClassElements as $el) {
$val .= $el->textContent;
$val .= $this->textContent($el);
}
return unicodeTrim($val);
......@@ -399,7 +425,7 @@ class Parser {
} elseif (in_array($p->tagName, array('data', 'input')) and $p->getAttribute('value') !== '') {
$pValue = $p->getAttribute('value');
} else {
$pValue = unicodeTrim($p->textContent);
$pValue = unicodeTrim($this->textContent($p));
}
return $pValue;
......@@ -434,7 +460,7 @@ class Parser {
} elseif (in_array($u->tagName, array('data', 'input')) and $u->getAttribute('value') !== null) {
return $u->getAttribute('value');
} else {
return unicodeTrim($u->textContent);
return unicodeTrim($this->textContent($u));
}
}
......@@ -596,17 +622,8 @@ class Parser {
// Expand relative URLs within children of this element
// TODO: as it is this is not relative to only children, make this .// and rerun tests
$hyperlinkChildren = $this->xpath->query('//*[@src or @href or @data]', $e);
foreach ($hyperlinkChildren as $child) {
if ($child->hasAttribute('href'))
$child->setAttribute('href', $this->resolveUrl($child->getAttribute('href')));
if ($child->hasAttribute('src'))
$child->setAttribute('src', $this->resolveUrl($child->getAttribute('src')));
if ($child->hasAttribute('data'))
$child->setAttribute('data', $this->resolveUrl($child->getAttribute('data')));
}
$this->resolveChildUrls($e);
$html = '';
foreach ($e->childNodes as $node) {
$html .= $node->C14N();
......@@ -614,7 +631,7 @@ class Parser {
return array(
'html' => $html,
'value' => unicodeTrim($e->textContent)
'value' => unicodeTrim($this->textContent($e))
);
}
......
......@@ -94,12 +94,12 @@ class ParsePTest extends PHPUnit_Framework_TestCase {
$input = <<<EOT
<div class="h-entry">
<p class="p-name">The day I saw a <img alt="five legged elephant" src="/photos/five-legged-elephant.jpg" /></p>
<p class="p-summary">Blah blah <img alt="" src="/photos/five-legged-elephant.jpg" /></p>
<p class="p-summary">Blah blah <img src="/photos/five-legged-elephant.jpg" /></p>
</div>
EOT;
$result = Mf2\parse($input, 'http://waterpigs.co.uk/articles/five-legged-elephant');
$this->assertEquals('The day I saw a five legged elephant', $result['items'][0]['properties']['name'][0]);
$this->assertEquals('Blah blah blah http://waterpigs.co.uk/photos/five-legged-elephant.jpg', $result['items'][0]['properties']['summary'][0]);
$this->assertEquals('Blah blah http://waterpigs.co.uk/photos/five-legged-elephant.jpg', $result['items'][0]['properties']['summary'][0]);
}
}
......@@ -93,7 +93,7 @@ class ParserTest extends PHPUnit_Framework_TestCase {
$output = $parser->parse();
$this->assertEquals('Blah blah <a href="http://example.com/a-url">thing</a>. <object data="http://example.com/object"></object> <img src="http://example.com/img"></img>', $output['items'][0]['properties']['content'][0]['html']);
$this->assertEquals('Blah blah thing.', $output['items'][0]['properties']['content'][0]['value']);
$this->assertEquals('Blah blah thing. http://example.com/img', $output['items'][0]['properties']['content'][0]['value']);
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment