summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-03-30 11:25:06 +0200
committerYorhel <git@yorhel.nl>2020-03-30 11:25:08 +0200
commitab2fadc4a7e18eb904948b51f47e5d738c35c53e (patch)
treecbd41c3d15968af8096c00ba997079bf2f50f1ce
parent97fa5d2a78f715297879381d32fd594d20761fe8 (diff)
Producers::Graph: Experiment with dynamically generating relation graphs
The producers.rgraph column still exists and the old graphs are still being generated - that will be removed if this new approach works out.
-rw-r--r--README.md4
-rw-r--r--lib/Multi/RG.pm5
-rw-r--r--lib/VNDB/Config.pm1
-rw-r--r--lib/VNDB/Handler/Producers.pm22
-rw-r--r--lib/VNWeb/Producers/Graph.pm162
5 files changed, 170 insertions, 24 deletions
diff --git a/README.md b/README.md
index 61c76c0f..fe1f59ab 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ Global requirements:
**Perl modules** (core modules are not listed):
General:
+- AnyEvent
- Crypt::ScryptKDF
- Crypt::URandom
- DBD::Pg
@@ -59,6 +60,7 @@ General:
- Image::Magick
- JSON::XS
- PerlIO::gzip
+- graphviz (/usr/bin/dot is used by default)
util/vndb.pl (the web backend):
- Algorithm::Diff::XS
@@ -68,12 +70,10 @@ util/vndb.pl (the web backend):
- HTTP::Server::Simple
util/multi.pl (application server, optional):
-- AnyEvent
- AnyEvent::HTTP
- AnyEvent::IRC
- AnyEvent::Pg
- XML::Parser
-- graphviz (/usr/bin/dot is used by default)
## Manual setup
diff --git a/lib/Multi/RG.pm b/lib/Multi/RG.pm
index 0a6039a3..57722c9c 100644
--- a/lib/Multi/RG.pm
+++ b/lib/Multi/RG.pm
@@ -3,6 +3,11 @@
# Multi::RG - Relation graph generator
#
+# XXX: The producer graphs generated here are not used at the moment, that's
+# now done on demand in VNWeb::Producers::Graph. If that's successful, the same
+# approach will be applied to VN graphs and this module can be removed
+# entirely.
+
package Multi::RG;
use strict;
diff --git a/lib/VNDB/Config.pm b/lib/VNDB/Config.pm
index 11f1822a..8627cf25 100644
--- a/lib/VNDB/Config.pm
+++ b/lib/VNDB/Config.pm
@@ -26,6 +26,7 @@ my $config = {
login_throttle => [ 24*3600/10, 24*3600 ], # interval between attempts, max burst (10 a day)
board_edit_time => 7*24*3600, # Time after which posts become immutable
poll_options => 20, # max number of options in discussion board polls
+ graphviz_path => '/usr/bin/dot',
engines => [ grep $_, split /\s*\n\s*/, q{
BGI/Ethornell
diff --git a/lib/VNDB/Handler/Producers.pm b/lib/VNDB/Handler/Producers.pm
index b75081f0..9be2a75c 100644
--- a/lib/VNDB/Handler/Producers.pm
+++ b/lib/VNDB/Handler/Producers.pm
@@ -10,7 +10,6 @@ use VNDB::ExtLinks;
TUWF::register(
- qr{p([1-9]\d*)/rg} => \&rg,
qr{p([1-9]\d*)(?:\.([1-9]\d*))?} => \&page,
qr{p/add} => \&addform,
qr{p(?:([1-9]\d*)(?:\.([1-9]\d*))?/edit|/new)}
@@ -20,27 +19,6 @@ TUWF::register(
);
-sub rg {
- my($self, $pid) = @_;
-
- my $p = $self->dbProducerGet(id => $pid, what => 'relgraph')->[0];
- return $self->resNotFound if !$p->{id} || !$p->{rgraph};
-
- my $title = "Relation graph for $p->{name}";
- return if $self->htmlRGHeader($title, 'p', $p);
-
- $p->{svg} =~ s/id="node_p$pid"/id="graph_current"/;
-
- div class => 'mainbox';
- h1 $title;
- p class => 'center';
- lit $p->{svg};
- end;
- end;
- $self->htmlFooter;
-}
-
-
sub page {
my($self, $pid, $rev) = @_;
diff --git a/lib/VNWeb/Producers/Graph.pm b/lib/VNWeb/Producers/Graph.pm
new file mode 100644
index 00000000..96342d28
--- /dev/null
+++ b/lib/VNWeb/Producers/Graph.pm
@@ -0,0 +1,162 @@
+package VNWeb::Producers::Graph;
+
+use VNWeb::Prelude;
+use AnyEvent::Util;
+use TUWF::XML 'xml_escape';
+use Encode 'encode_utf8', 'decode_utf8';
+
+
+# Given a starting ID, an array of {id0,id1} relation hashes and a number of
+# nodes to be included, returns a hash of (id=>{id, distance, rels}) nodes.
+#
+# This is basically a breath-first search that prioritizes nodes with fewer
+# relations. Direct relations with the starting node are always included,
+# regardless of $num.
+sub gen_nodes {
+ my($id, $rel, $num) = @_;
+
+ my %rels;
+ push $rels{$_->{id0}}->@*, $_->{id1} for @$rel;
+
+ my %nodes;
+ my sub add {
+ my($n, $dist) = @_;
+ if(!$nodes{$n}) {
+ $nodes{$n} = { id => $n, rels => $rels{$n}, distance => $dist };
+ $num--;
+ }
+ }
+
+ my @q = ([$id,0]);
+ while(@q && ($num > 0 || $q[0][1] <= 1)) {
+ my($n, $dist) = shift(@q)->@*;
+ add $n, $dist++;
+ push @q, map [$_, $dist], sort { $rels{$a}->@* <=> $rels{$b}->@* } grep !$nodes{$_}, $rels{$n}->@*;
+ }
+
+ \%nodes;
+}
+
+
+sub dot2svg {
+ my($dot) = @_;
+
+ $dot = encode_utf8 $dot;
+ local $SIG{CHLD} = undef; # Fixed in TUWF 4d8a59cc1dfb5f919298ee495b8865f7872f6cbb
+ my $e = run_cmd([config->{graphviz_path},'-Tsvg'], '<', \$dot, '>', \my $out, '2>', \my $err)->recv;
+ warn "graphviz STDERR: $err\n" if chomp $err;
+ $e and die "Failed to run graphviz";
+
+ # - Remove <?xml> declaration and <!DOCTYPE> (not compatible with embedding in HTML5)
+ # - Remove comments (unused)
+ # - Remove <title> elements (unused)
+ # - Remove first <polygon> element (emulates a background color)
+ # - Replace stroke and fill attributes with classes (so that coloring is done in CSS)
+ # (I used to have an implementation based on XML::Parser, but regexes are so much faster...)
+ decode_utf8($out)
+ =~ s/<\?xml.+?\?>//r
+ =~ s/<!DOCTYPE[^>]*>//r
+ =~ s/<!--.*?-->//srg
+ =~ s/<title>.+?<\/title>//gr
+ =~ s/<polygon.+?\/>//r
+ =~ s/(?:stroke|fill)="([^"]+)"/$1 eq '#111111' ? 'class="border"' : $1 eq '#222222' ? 'class="nodebg"' : ''/egr;
+}
+
+
+sub gen_dot {
+ my($rel, $nodes, $params) = @_;
+
+ # Attempt to figure out a good 'rankdir' to minimize the width of the
+ # graph. Ideally we'd just generate two graphs and pick the least wide one,
+ # but that's way too slow. Graphviz tends to put adjacent nodes next to
+ # each other, so going for the LR (left-right) rank order tends to work
+ # better with large fan-out, while TB (top-bottom) often results in less
+ # wide graphs for large depths.
+ #my $max_distance = max map $_->{distance}, values %$nodes;
+ my $max_fanout = max map scalar grep($nodes->{$_}, $_->{rels}->@*), values %$nodes;
+ my $rankdir = $max_fanout > 6 ? 'LR' : 'TB';
+
+ my $dot =
+ qq|graph rgraph {\n|.
+ qq|\trankdir=$rankdir\n|.
+ qq|\tnodesep=0.1\n|.
+ qq|\tnode [ fontname = "Arial", shape = "plaintext", fontsize = 8, color = "#111111" ]\n|.
+ qq|\tedge [ labeldistance = 2.5, labelangle = -20, labeljust = 1, minlen = 2, dir = "both",|.
+ qq| fontname = "Arial", fontsize = 7, arrowsize = 0.7, color = "#111111" ]\n|;
+
+ for my $n (sort { $a->{id} <=> $b->{id} } values %$nodes) {
+ my $name = xml_escape shorten $n->{name}, 27;
+ my $tooltip = $n->{name} =~ s/\\/\\\\/rg =~ s/"/\\"/rg =~ s/&/&amp;/rg;
+ my $nodeid = $n->{distance} == 0 ? 'id = "graph_current", ' : '';
+ $dot .=
+ qq|\tn$n->{id} [ $nodeid URL = "/p$n->{id}", tooltip = "$tooltip", label=<|.
+ qq|<TABLE CELLSPACING="0" CELLPADDING="2" BORDER="0" CELLBORDER="1" BGCOLOR="#222222">|.
+ qq|<TR><TD COLSPAN="2" ALIGN="CENTER" CELLPADDING="3"><FONT POINT-SIZE="9"> $name </FONT></TD></TR>|.
+ qq|<TR><TD ALIGN="CENTER"> $LANGUAGE{$n->{lang}} </TD><TD ALIGN="CENTER"> $PRODUCER_TYPE{$n->{type}} </TD></TR>|.
+ qq|</TABLE>> ]\n|;
+
+ my $notshown = grep !$nodes->{$_}, $n->{rels}->@*;
+ $dot .=
+ qq|\tns$n->{id} [ URL = "/p$n->{id}/rg$params", label="$notshown more..." ]\n|.
+ qq|\tn$n->{id} -- ns$n->{id} [ dir = "forward", style = "dashed" ]\n|
+ if $notshown;
+ }
+
+ for (grep $_->{id0} < $_->{id1} && $nodes->{$_->{id0}} && $nodes->{$_->{id1}}, @$rel) {
+ my $lbl1 = $PRODUCER_RELATION{$_->{relation}}{txt};
+ my $lbl2 = $PRODUCER_RELATION{ $PRODUCER_RELATION{$_->{relation}}{reverse} }{txt};
+ $dot .= "\tn$_->{id0} -- n$_->{id1} [".($lbl1 eq $lbl2 ? qq{label="$lbl1"} : qq{headlabel="$lbl1", taillabel="$lbl2"})."]\n";
+ }
+
+ $dot .= "}\n";
+ $dot
+}
+
+
+TUWF::get qr{/$RE{pid}/rg}, sub {
+ my $id = tuwf->capture(1);
+ my $num = tuwf->validate(get => num => { uint => 1, onerror => 15 })->data;
+ my $p = tuwf->dbRowi('SELECT id, name, original, hidden AS entry_hidden, locked AS entry_locked FROM producers WHERE id =', \$id);
+
+ # Big list of { id0, id1, relation } hashes.
+ # Each relation is included twice, with id0 and id1 reversed.
+ my $rel = tuwf->dbAlli(q{
+ WITH RECURSIVE rel(id0, id1, relation) AS (
+ SELECT id, pid, relation FROM producers_relations WHERE id =}, \$id, q{
+ UNION
+ SELECT id, pid, pr.relation FROM producers_relations pr JOIN rel r ON pr.id = r.id1
+ ) SELECT * FROM rel ORDER BY id0
+ });
+ return tuwf->resNotFound if !@$rel;
+
+ # Fetch the nodes
+ my $nodes = gen_nodes $id, $rel, $num;
+ enrich_merge id => 'SELECT id, name, lang, type FROM producers WHERE id IN', values %$nodes;
+
+ my $total_nodes = keys { map +($_->{id0},1), @$rel }->%*;
+ my $visible_nodes = keys %$nodes;
+
+ framework_ title => "Relations for $p->{name}", type => 'p', dbobj => $p, tab => 'rg',
+ sub {
+ div_ class => 'mainbox', sub {
+ h1_ "Relations for $p->{name}";
+ p_ sub {
+ txt_ sprintf "Displaying %d out of %d related producers.", $visible_nodes, $total_nodes;
+ br_;
+ txt_ "Adjust graph size: ";
+ join_ ', ', sub {
+ if($_ == min $num, $total_nodes) {
+ txt_ $_ ;
+ } else {
+ a_ href => "/p$id/rg?num=$_", $_;
+ }
+ }, grep($_ < $total_nodes, 10, 15, 25, 50, 75, 100, 150, 250, 500, 750, 1000), $total_nodes;
+ txt_ '.';
+ } if $total_nodes > 10;
+ p_ class => 'center', sub { lit_ dot2svg gen_dot $rel, $nodes, $num == 15 ? '' : "?num=$num" };
+ debug_ +{ nodes => $nodes, rel => $rel };
+ }
+ };
+};
+
+1;