Commit 96643a01 authored by Aaron Parecki's avatar Aaron Parecki

Merge branch 'master' of github.com:indieweb/php-mf2

parents b1814f6a 1d6500bb
......@@ -2,4 +2,5 @@
/nbproject
composer.phar
/vendor/
/tmp
\ No newline at end of file
/tmp
.idea/
......@@ -46,6 +46,42 @@ function parse($input, $url = null, $convertClassic = true) {
return $parser->parse($convertClassic);
}
/**
* Fetch microformats2
*
* Given a URL, fetches it (following up to 5 redirects) and, if the content-type appears to be HTML, returns the parsed
* microformats2 array structure.
*
* Not that even if the response code was a 4XX or 5XX error, if the content-type is HTML-like then it will be parsed
* all the same, as there are legitimate cases where error pages might contain useful microformats (for example a deleted
* h-entry resulting in a 410 Gone page with a stub h-entry explaining the reason for deletion). Look in $curlInfo['http_code']
* for the actual value.
*
* @param string $url The URL to fetch
* @param bool $convertClassic (optional, default true) whether or not to convert classic microformats
* @param &array $curlInfo (optional) the results of curl_getinfo will be placed in this variable for debugging
* @return array|null canonical microformats2 array structure on success, null on failure
*/
function fetch($url, $convertClassic = true, &$curlInfo=null) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
$response = curl_exec($ch);
$info = $curlInfo = curl_getinfo($ch);
curl_close($ch);
if (strpos(strtolower($info['content_type']), 'html') === false) {
// The content was not delivered as HTML, do not attempt to parse it.
return null;
}
$html = mb_substr($response, $info['header_size']);
return parse($html, $url, $convertClassic);
}
/**
* Unicode to HTML Entities
* @param string $input String containing characters to convert into HTML entities
......@@ -143,6 +179,54 @@ function nestedMfPropertyNamesFromElement(\DOMElement $e) {
return nestedMfPropertyNamesFromClass($class);
}
/**
* Converts various time formats to HH:MM
* @param string $time The time to convert
* @return string
*/
function convertTimeFormat($time) {
$hh = $mm = $ss = '';
preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches);
// if no am/pm specified
if ( empty($matches[4]) ) {
return $time;
}
// else am/pm specified
else {
$meridiem = strtolower(str_replace('.', '', $matches[4]));
// hours
$hh = $matches[1];
// add 12 to the pm hours
if ( $meridiem == 'pm' && ($hh < 12) )
{
$hh += 12;
}
$hh = str_pad($hh, 2, '0', STR_PAD_LEFT);
// minutes
$mm = ( empty($matches[2]) ) ? '00' : $matches[2];
// seconds, only if supplied
if ( !empty($matches[3]) )
{
$ss = $matches[3];
}
if ( empty($ss) ) {
return sprintf('%s:%s', $hh, $mm);
}
else {
return sprintf('%s:%s:%s', $hh, $mm, $ss);
}
}
}
/**
* Microformats2 Parser
*
......@@ -355,9 +439,10 @@ class Parser {
* Given an element with class="dt-*", get the value of the datetime as a php date object
*
* @param DOMElement $dt The element to parse
* @param array $dates Array of dates processed so far
* @return string The datetime string found
*/
public function parseDT(\DOMElement $dt) {
public function parseDT(\DOMElement $dt, &$dates = array()) {
// Check for value-class pattern
$valueClassChildren = $this->xpath->query('./*[contains(concat(" ", @class, " "), " value ") or contains(concat(" ", @class, " "), " value-title ")]', $dt);
$dtValue = false;
......@@ -408,19 +493,35 @@ class Parser {
foreach ($dateParts as $part) {
// Is this part a full ISO8601 datetime?
if (preg_match('/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(?::\d{2})?(?:Z?[+|-]\d{2}:?\d{2})?$/', $part)) {
// Break completely, we’ve got our value
// Break completely, we’ve got our value.
$dtValue = $part;
break;
} else {
// Is the current part a valid time(+TZ?) AND no other time reprentation has been found?
// Is the current part a valid time(+TZ?) AND no other time representation has been found?
if ((preg_match('/\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $part) or preg_match('/\d{1,2}[a|p]m/', $part)) and empty($timePart)) {
$timePart = $part;
} elseif (preg_match('/\d{4}-\d{2}-\d{2}/', $part) and empty($datePart)) {
// Is the current part a valid date AND no other date reprentation has been found?
// Is the current part a valid date AND no other date representation has been found?
$datePart = $part;
}
$dtValue = rtrim($datePart, 'T') . 'T' . unicodeTrim($timePart, 'T');
if ( !empty($datePart) && !in_array($datePart, $dates) ) {
$dates[] = $datePart;
}
$dtValue = '';
if ( empty($datePart) && !empty($timePart) ) {
$timePart = convertTimeFormat($timePart);
$dtValue = unicodeTrim($timePart, 'T');
}
else if ( !empty($datePart) && empty($timePart) ) {
$dtValue = rtrim($datePart, 'T');
}
else {
$timePart = convertTimeFormat($timePart);
$dtValue = rtrim($datePart, 'T') . 'T' . unicodeTrim($timePart, 'T');
}
}
}
} else {
......@@ -458,6 +559,19 @@ class Parser {
} else {
$dtValue = $dt->nodeValue;
}
if ( preg_match('/(\d{4}-\d{2}-\d{2})/', $dtValue, $matches) ) {
$dates[] = $matches[0];
}
}
/**
* if $dtValue is only a time and there are recently parsed dates,
* form the full date-time using the most recnetly parsed dt- value
*/
if ( (preg_match('/^\d{1,2}:\d{1,2}(Z?[+|-]\d{2}:?\d{2})?/', $dtValue) or preg_match('/^\d{1,2}[a|p]m/', $dtValue)) && !empty($dates) ) {
$dtValue = convertTimeFormat($dtValue);
$dtValue = end($dates) . 'T' . unicodeTrim($dtValue, 'T');
}
return $dtValue;
......@@ -518,6 +632,7 @@ class Parser {
// Initalise var to store the representation in
$return = array();
$children = array();
$dates = array();
// Handle nested microformats (h-*)
foreach ($this->xpath->query('.//*[contains(concat(" ", @class)," h-")]', $e) as $subMF) {
......@@ -590,7 +705,7 @@ class Parser {
if ($this->isElementParsed($dt, 'dt'))
continue;
$dtValue = $this->parseDT($dt);
$dtValue = $this->parseDT($dt, $dates);
if ($dtValue) {
// Add the value to the array for dt- properties
......
......@@ -13,14 +13,15 @@ You could install it by just downloading `/Mf2/Parser.php` and including that, b
## Usage
mf2 is PSR-0 autoloadable, so all you have to do to load it is:
php-mf2 is PSR-0 autoloadable, so all you have to do to load it is:
1. Include Composer’s auto-generated autoload file (`/vendor/autoload.php`)
1. Call `Mf2\parse()` with the HTML (or a DOMDocument), and optionally the URL to resolve relative URLs against.
1. To fetch microformats from a URL, call `Mf2\fetch($url)`
1. To parse microformats from HTML, call `Mf2\parse($html)`, optionally with a URL as a second parameter to resolve relative URLs against.
## Examples
### Parsing implied microformats2
### Fetching microformats from a page
```php
<?php
......@@ -31,6 +32,21 @@ require '/vendor/autoload.php';
use Mf2;
// (Above code (or equivalent) assumed in future examples)
$mf = Mf2\fetch('http://microformats.org');
foreach ($mf['items'] as $microformat) {
echo "A {$microformat['type'][0]} called {$microformat['properties']['name'][0]}\n";
}
```
### Parsing implied microformats2
```php
<?php
$output = Mf2\parse('<p class="h-card">Barnaby Walters</p>');
```
......@@ -114,12 +130,29 @@ Protip: if you’re not bothered about the microformats2 data and just want rels
```php
<?php
use Mf2;
$parser = new Mf2\Parser('<link rel="…');
$relsAndAlternates = $parser->parseRelsAndAlternates();
```
### Debugging Mf2\fetch
`Mf2\fetch()` will attempt to parse any response served with “HTML” in the content-type, regardless of what the status code is. If it receives a non-HTML response it will return null.
To learn what the HTTP status code for any request was, or learn more about the request, pass a variable name as the third parameter to `Mf2\fetch()` — this will be filled with the contents of `curl_getinfo()`, e.g:
```php
<?php
$mf = Mf2\fetch('http://waterpigs.co.uk/this-page-doesnt-exist', true, $curlInfo);
if ($curlInfo['http_code'] == '404') {
// This page doesn’t exist.
}
```
If it was HTML then it is still parsed, as there are cases where error pages contain microformats — for example a deleted h-entry resulting in a 410 Gone response containing a stub h-entry with amn explanation for the deletion.
### Getting more control by creating a Parser object
The `Mf2\parse()` function covers the most common usage patterns by internally creating an instance of `Mf2\Parser` and returning the output all in one step. For some advanced usage you can also create an instance of `Mf2\Parser` yourself.
......@@ -178,13 +211,21 @@ TODO: move this section to a security/consumption best practises page on the wik
## Contributing
Pull requests very welcome, please try to maintain stylistic, structural and naming consistency with the existing codebase, and don’t be too upset if I make naming changes :)
Issues and bug reports are very welcome. If you know how to write tests then please do so as code always expresses problems and intent much better than English, and gives me a way of measuring whether or not fixes have actually solved your problem. If you don’t know how to write tests, don’t worry :) Just include as much useful information in the issue as you can.
Please add tests which cover changes you plan to make or have made. I use PHPUnit, which is the de-facto standard for modern PHP development.
Pull requests very welcome, please try to maintain stylistic, structural and naming consistency with the existing codebase, and don’t be too upset if I make naming changes :)
At the very least, run the test suite before and after making your changes to make sure you haven’t broken anything.
### How to make a Pull Request
Issues/bug reports welcome. If you know how to write tests then please do so as code always expresses problems and intent much better than English, and gives me a way of measuring whether or not fixes have actually solved your problem. If you don’t know how to write tests, don’t worry :) Just include as much useful information in the issue as you can.
1. Fork the repo to your github account
2. Clone a copy to your computer (simply installing php-mf2 using composer only works for using it, not developing it)
3. Install the dev dependencies with `./composer.phar install`
4. Run PHPUnit with `./vendor/bin/phpunit`
5. Make your changes
6. Add PHPUnit tests for your changes, either in an existing test file if suitable, or a new one
7. Make sure your tests pass (`./vendor/bin/phpunit`)
8. Go to your fork of the repo on github.com and make a pull request, preferably with a short summary, detailed description and references to issues/parsing specs as appropriate
9. Bask in the warm feeling of having contributed to a piece of free software
## Testing
......@@ -285,4 +326,4 @@ php-mf2 can also be hooked up to the official, cross-platform [microformats2 tes
#### v0.1.15
* Added html-safe options
* Added rel+rel-alternate parsing
\ No newline at end of file
* Added rel+rel-alternate parsing
......@@ -107,6 +107,5 @@ EOT;
public function testParsesSnarfedOrgArticleCorrectly() {
$input = file_get_contents(__DIR__ . '/snarfed.org.html');
$result = Mf2\parse($input, 'http://snarfed.org/2013-10-23_oauth-dropins');
print_r($result);
}
}
......@@ -167,8 +167,6 @@ class CombinedMicroformatsTest extends PHPUnit_Framework_TestCase {
$parser = new Parser($input, '', true);
$output = $parser->parse();
print_r($output);
$this->assertJsonStringEqualsJsonString(json_encode($output), $expected);
}
......
......@@ -135,7 +135,7 @@ class ParseDTTest extends PHPUnit_Framework_TestCase {
* @group valueClass
*/
public function testAbbrYYYY_MM_DD__HH_MM() {
$input = '<div class="h-event"><span class="dt-start"><abbr class="value" title="2012-10-07">some day</a> at <span class="value">21:18</span></span></div>';
$input = '<div class="h-event"><span class="dt-start"><abbr class="value" title="2012-10-07">some day</abbr> at <span class="value">21:18</span></span></div>';
$parser = new Parser($input);
$output = $parser->parse();
......@@ -155,4 +155,107 @@ class ParseDTTest extends PHPUnit_Framework_TestCase {
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertEquals('2012-10-07T21:00', $output['items'][0]['properties']['start'][0]);
}
/**
* @group parseDT
* @group valueClass
*/
public function testYYYY_MM_DD__HH_MMpm() {
$input = '<div class="h-event"><span class="dt-start"><span class="value">2012-10-07</span> at <span class="value">9:00pm</span></span></div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertEquals('2012-10-07T21:00', $output['items'][0]['properties']['start'][0]);
}
/**
* @group parseDT
* @group valueClass
*/
public function testYYYY_MM_DD__HH_MM_SSpm() {
$input = '<div class="h-event"><span class="dt-start"><span class="value">2012-10-07</span> at <span class="value">9:00:00pm</span></span></div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertEquals('2012-10-07T21:00:00', $output['items'][0]['properties']['start'][0]);
}
/**
* This test name refers to the value-class used within the dt-end.
* @group parseDT
* @group valueClass
*/
public function testImpliedDTEndWithValueClass() {
$input = '<div class="h-event"> <span class="dt-start"><span class="value">2014-06-04</span> at <span class="value">18:30</span> <span class="dt-end"><span class="value">19:30</span></span></span> </div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertArrayHasKey('end', $output['items'][0]['properties']);
$this->assertEquals('2014-06-04T18:30', $output['items'][0]['properties']['start'][0]);
$this->assertEquals('2014-06-04T19:30', $output['items'][0]['properties']['end'][0]);
}
/**
* This test name refers to the lack of value-class within the dt-end.
* @group parseDT
* @group valueClass
*/
public function testImpliedDTEndWithoutValueClass() {
$input = '<div class="h-event"> <span class="dt-start"><span class="value">2014-06-05</span> at <span class="value">18:31</span> <span class="dt-end">19:31</span></span> </div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertArrayHasKey('end', $output['items'][0]['properties']);
$this->assertEquals('2014-06-05T18:31', $output['items'][0]['properties']['start'][0]);
$this->assertEquals('2014-06-05T19:31', $output['items'][0]['properties']['end'][0]);
}
/**
* @see https://github.com/indieweb/php-mf2/pull/46
* @group parseDT
* @group valueClass
*/
public function testImpliedDTEndUsingNonValueClassDTStart() {
$input = '<div class="h-event"> <time class="dt-start">2014-06-05T18:31</time> until <span class="dt-end">19:31</span></span> </div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertArrayHasKey('end', $output['items'][0]['properties']);
$this->assertEquals('2014-06-05T18:31', $output['items'][0]['properties']['start'][0]);
$this->assertEquals('2014-06-05T19:31', $output['items'][0]['properties']['end'][0]);
}
/**
* @group parseDT
* @group valueClass
*/
public function testDTStartOnly() {
$input = '<div class="h-event"> <span class="dt-start"><span class="value">2014-06-06</span> at <span class="value">18:32</span> </span> </div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertEquals('2014-06-06T18:32', $output['items'][0]['properties']['start'][0]);
}
/**
* @group parseDT
* @group valueClass
*/
public function testDTStartDateOnly() {
$input = '<div class="h-event"> <span class="dt-start"><span class="value">2014-06-07</span> </span> </div>';
$parser = new Parser($input);
$output = $parser->parse();
$this->assertArrayHasKey('start', $output['items'][0]['properties']);
$this->assertEquals('2014-06-07', $output['items'][0]['properties']['start'][0]);
}
}
......@@ -189,4 +189,16 @@ EOT;
$this->assertCount(1, $output['items'][0]['properties']['in-reply-to']);
$this->assertEquals('Name', $output['items'][0]['properties']['in-reply-to'][0]['properties']['name'][0]);
}
/**
* @group network
*/
public function testFetchMicroformats() {
$mf = Mf2\fetch('http://waterpigs.co.uk/');
$this->assertArrayHasKey('items', $mf);
$mf = Mf2\fetch('http://waterpigs.co.uk/photo.jpg', null, $curlInfo);
$this->assertNull($mf);
$this->assertContains('jpeg', $curlInfo['content_type']);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment