...
 
Commits (15)
......@@ -3,7 +3,7 @@
#+OPTIONS: p:nil pri:nil prop:nil stat:t tags:t tasks:t tex:t timestamp:nil title:t toc:5
#+OPTIONS: todo:t |:t
#+TITLE: RDF by Example: rdfpuml for True RDF Diagrams, rdf2rml for R2RML Generation
#+DATE: <2018-12-05>
#+DATE: <2018-12-06>
#+AUTHOR: Vladimir Alexiev
#+EMAIL: vladimir.alexiev@ontotext.com
#+LANGUAGE: en
......@@ -63,14 +63,40 @@ Install the following prerequisites:
- [[https://jena.apache.org/download/][Apache Jena]]: ~riot~, ~update~. Tested with version 3.1.0 of 2016-05-10.
- cat, grep, rm
* TODO Future Work
Help needed for the following tasks:
- Modularize and package better, release on CPAN.
*Please help*. Jonas Smedegaard (dr at jones fullstop dk) has volunteered, so this may happen soon
* TODO
Help needed for the following tasks.
Post bugs and enhancement requests to this repo!
** Done
** Near-term
- Modularize and package better.
- Jonas Smedegaard (dr at jones fullstop dk) has volunteered, so this may happen soon
- Release on CPAN
- Add Unicode tests (ttl with non-ASCII chars: Cyrillic, French, etc)
- Batch a number of ~ttl~ files to one ~puml~ file
- Rationale: plantuml is slow to start up, so putting several diagrams in one file will make things faster:
#+BEGIN_SRC puml
@startuml file1.png
# made from file1.ttl
@enduml
@startuml file2.png
# made from file2.ttl
@enduml
#+END_SRC
- However, this interferes with ~make~ processing that regenerates only ~png~ for changed ~ttl~ files.
So we need a smarter ~Makefile~ that batches up only the changed ~ttl~ for processing.
- ~rdfpuml~ should take multiple input files, and write a single output
- It would also be useful to take a whole folder of ~ttl~ tiles as input
- Eliminate the dependency of rdfpuml on [[./lib/RDF/Prefixes/Curie.pm]] once [[https://github.com/kasei/perlrdf/issues/131][perlrdf#131]] is fixed
** Mid-Term
- Upgrade to use [[https://github.com/kasei/attean][Attean]] instead of [[https://github.com/kasei/perlrdf][Trine (Perl RDF)]]
- Integrate in Emacs ~org-mode~: write Turtle, see diagram (easy to do)
- Enahnce rdfpuml to allow node colors, icons and tooltips (see [[./ideas]])
- Ability to describe custom reification situations using the Property Reification Vocabulary (PRV)
** Long-Term
- Extend rdf2rml to describe & generate RDF Shapes
- Another tool to visualize RDF Shapes (SHACL and ShEx)
- R2RML works great for RDBMS, but how about other sources?
......@@ -78,9 +104,6 @@ Help needed for the following tasks:
- [[http://rml.io][RML:]] extends R2RML to handle RDB, XML, JSON, CSV
- [[http://github.com/semantalytics/xsparql][XSPARQL:]] extends XQuery with SPARQL construct and JSON input
- [[https://tarql.github.io/][tarql]]: handles TSV/CSV with SPARQL construct
- Enahnce rdfpuml to allow node colors, icons and tooltips (see [[./ideas]])
Post bugs and enhancement requests to this repo!
* Citation
If you use this software, please cite it
......
......@@ -15,17 +15,42 @@
# if 0;
use strict;
use Encode;
use warnings;
use autodie;
use Getopt::Long 2.24 qw(:config gnu_getopt);
use Pod::Usage;
use Carp::Always; # http://search.cpan.org/~ferreira/Carp-Always-0.13/lib/Carp/Always.pm
# stronger than $Carp::Verbose = 1;
use RDF::Trine;
use RDF::Query;
use Slurp;
use Path::Tiny 0.058;
use FindBin;
use lib "$FindBin::Bin/../lib"; # Curie is my own module, not yet on CPAN
use RDF::Prefixes::Curie;
#use Smart::Comments;
=head1 NAME
rdfpuml - convert RDF to PlantUML diagrams
=head1 SYNOPSIS
B<rdfpuml> [options] F<infile>
=head1 DESCRIPTION
B<rdfpuml> makes true diagrams directly from B<Turtle> examples
using B<PlantUML> and B<GraphViz>.
Diagram readability is of prime concern,
and rdfpuml introduces various diagram control mechanisms
using triples in the B<puml:> namespace.
Special attention is paid to inlining
and visualizing various Reification mechanisms (described with PRV).
=cut
my %PREFIXES =
(
crm => 'http://www.cidoc-crm.org/cidoc-crm/',
......@@ -92,19 +117,71 @@ filter not exists {?re a puml:NoReify}}
SPARQL
## $RE_SPARQL;
my $fname = shift or die "perl rdfpuml <file>: read <file>.ttl, write <file>.puml\n";
$fname =~ s{\.ttl$}{};
=head1 OPTIONS
=over 16
=item B<--infile>
Read from this Turtle file.
=item B<--outfile>
Write to this PlantUML file.
=item B<--prefixfile>
Read Turtle-ish prefixes from this file.
=item B<--help>
Print a brief help message and exits.
=back
=cut
my %opt;
GetOptions(\%opt, 'infile=s', 'outfile=s', 'prefixfile=s', 'help')
or pod2usage(2);
pod2usage(1) if ($opt{help});
=head1 ARGUMENTS
=over 16
=item B<infile>
Read from this Turtle file.
=back
Arguments override options.
E.g. C<rdfpuml --infile foo.ttl bar.ttl> will only read F<bar.ttl>.
PlantUML output is written to file with a name stemming from B<infile>,
existing suffix F<.ttl> removed (if it existed) and suffix F<.puml> added.
=cut
pod2usage("$0: Missing input filename.\n")
unless (@ARGV);
my $infile = path( $ARGV[0] || $opt{infile} );
my $outfile = path( $opt{outfile} || $infile->basename('.ttl') . '.puml' );
my $prefixfile = $opt{prefixfile} || $infile->sibling('prefixes.ttl');
my $prefixes = -e "prefixes.ttl" ? slurp("prefixes.ttl") : "";
my $file = slurp("$fname.ttl");
my $turtle = decode_utf8 "$PREFIXES_TURTLE\n$prefixes\n$file";
my $prefixes_all = decode_utf8 "$PREFIXES_TURTLE\n$prefixes";
open (STDOUT, ">:encoding(UTF-8)", "$fname.puml") or die "can't create $fname.puml: $!\n";
my $prefixes = $prefixfile->exists ? $prefixfile->slurp_utf8 : "";
my $data = $infile->slurp_utf8;
my $turtle = "$PREFIXES_TURTLE\n$prefixes\n$data";
my $prefixes_all = "$PREFIXES_TURTLE\n$prefixes";
open (STDOUT, ">:encoding(UTF-8)", $outfile);
binmode STDERR, ":encoding(UTF-8)";
#print STDERR $turtle; die;
my $store = RDF::Trine::Store::Memory->new();
my $model = RDF::Trine::Model->new($store) or die "can't create model: $!\n";
my $model = RDF::Trine::Model->new($store);
my $parser = RDF::Trine::Parser->new('turtle');
$parser->parse_into_model (undef, $turtle, $model);
my $map = RDF::Prefixes::Curie->new ($prefixes_all);
......@@ -121,7 +198,7 @@ stereotypes();
replace_inlines();
collect_predicate_arrows();
for my $s ($model->subjects(undef,undef)) {
for my $s ( sort { RDF::Trine::Node::compare( $a, $b ) } $model->subjects(undef,undef) ) {
my $s1 = puml_node($s);
my $noReify = print_types ($s, $s1); # types come first
print_relations ($s, $s1, $noReify);
......@@ -133,7 +210,7 @@ myprint ("\@enduml\n");
sub print_types {
my ($s, $s1) = @_;
my @types = map puml_node2($_), $model->objects ($s, U("rdf:type"));
my @types = sort map puml_node2($_), $model->objects ($s, U("rdf:type"));
my $noReify = grep m{puml:NoReify}, @types;
my $types = join ", ", grep !m{puml:NoReify}, @types;
myprint (qq{$s1 : a $types\n}) if $types;
......@@ -142,11 +219,12 @@ sub print_types {
sub print_relations {
my ($s, $s1, $noReify) = @_;
for my $o ($model->objects ($s, undef, undef, type=>'resource'),
for my $o (sort { RDF::Trine::Node::compare( $a, $b ) }
$model->objects ($s, undef, undef, type=>'resource'),
$model->objects ($s, undef, undef, type=>'blank')) {
# collect all relations between the two nodes.
# TODO: also collect inverse relations? Then be careful for reifications!
my @predicates = grep !m{rdf:type}, map puml_predicate($_), $model->predicates ($s, $o);
my @predicates = grep !m{rdf:type}, sort map puml_predicate($_), $model->predicates ($s, $o);
# TODO: remove actually reified predicates (see reification()), not potentially reifiable ($NOREL)
@predicates = grep !m{$NOREL}o, @predicates if !$noReify;
next unless @predicates;
......
#!perl
use strict;
use warnings;
use Test::More tests => 2;
use Test::Script;
script_compiles 'bin/rdfpuml.pl';
script_runs [ 'bin/rdfpuml.pl', '--help' ], { exit => 1 };
done_testing;
#!perl
use strict;
use warnings;
use Test::More tests => 18;
use Test::Script;
use Test::File::Contents;
use Path::Tiny;
sub inspect {
my $stem = shift;
my $infile = "$stem.ttl";
my $reffile = "$stem.puml";
my $outfile = path($reffile)->exists ? Path::Tiny->tempfile : $reffile;
subtest "Regenerate $reffile" => sub {
plan tests => 2;
script_runs [ 'bin/rdfpuml.pl', '--outfile', "$outfile", $infile ];
if ( "$outfile" ne $reffile ) {
files_eq_or_diff "$outfile", $reffile, 'Contents are the same';
}
else {
fail "Reference data recreated";
}
}
}
# To regenerate reference test data, simply delete the obsolete files.
inspect 'test/PCDM/PCDM_Multi_Page_Text';
inspect 'test/PCDM/PCDM_Multi_Page_Text-circles';
inspect 'test/TRR/awardWinner';
inspect 'test/TRR/project-participation';
inspect 'test/TRR/result-project';
inspect 'test/exhibitions/exhibition2';
inspect 'test/exhibitions/exhibitions-out';
inspect 'test/exhibitions/exhibitions';
inspect 'test/museum-object/acquisition';
inspect 'test/museum-object/bibref';
inspect 'test/museum-object/cast-after';
inspect 'test/museum-object/dimensions';
inspect 'test/museum-object/objects';
inspect 'test/museum-object/painting-support';
inspect 'test/museum-object/provenance';
inspect 'test/museum-object/title';
TODO: {
local $TODO = 'Fails to generate deterministically';
inspect 'test/exhibitions/exhibitions.r2rml';
inspect 'test/museum-object/objects.r2rml';
}
done_testing;