From 11963a54f3e86812e9bd3f542da57d1163c9d5ff Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 6 Jan 2018 12:58:21 +0100 Subject: Add bb2text() and use it for release notes & open graph tags --- lib/VNDB/BBCode.pm | 25 ++++++++++++++++++++++- lib/VNDB/Func.pm | 2 +- lib/VNDB/Handler/Chars.pm | 2 +- lib/VNDB/Handler/Producers.pm | 2 +- lib/VNDB/Handler/Releases.pm | 2 +- lib/VNDB/Handler/Staff.pm | 2 +- lib/VNDB/Handler/VNPage.pm | 7 ++----- util/bbcode-test.pl | 47 +++++++++++++++++++++++++++++++++++-------- 8 files changed, 70 insertions(+), 19 deletions(-) diff --git a/lib/VNDB/BBCode.pm b/lib/VNDB/BBCode.pm index ffa14d98..95a90d16 100644 --- a/lib/VNDB/BBCode.pm +++ b/lib/VNDB/BBCode.pm @@ -5,7 +5,7 @@ use warnings; use Exporter 'import'; use TUWF::XML 'xml_escape'; -our @EXPORT = qw/bb2html/; +our @EXPORT = qw/bb2html bb2text/; # Supported BBCode: # [spoiler] .. [/spoiler] @@ -226,4 +226,27 @@ sub bb2html { $ret; } + +# Convert bbcode into plain text, stripping all tags and spoilers. [url] tags +# only display the title. +sub bb2text { + my $input = shift; + + my $inspoil = 0; + my $ret = ''; + parse $input, sub { + my($raw, $tag, @arg) = @_; + if($tag eq 'spoiler_start') { + $inspoil = 1; + } elsif($tag eq 'spoiler_end') { + $inspoil = 0; + } else { + $ret .= $raw if !$inspoil && $tag !~ /_(start|end)$/; + } + 1; + }; + $ret; +} + + 1; diff --git a/lib/VNDB/Func.pm b/lib/VNDB/Func.pm index e46df5e2..7aa3d5c1 100644 --- a/lib/VNDB/Func.pm +++ b/lib/VNDB/Func.pm @@ -9,7 +9,7 @@ use POSIX 'strftime', 'ceil', 'floor'; use JSON::XS; use VNDBUtil; use VNDB::BBCode; -our @EXPORT = (@VNDBUtil::EXPORT, 'bb2html', qw| +our @EXPORT = (@VNDBUtil::EXPORT, 'bb2html', 'bb2text', qw| clearfloat cssicon tagscore mt minage fil_parse fil_serialize parenttags childtags charspoil imgpath imgurl fmtvote fmtmedia fmtvnlen fmtage fmtdatestr fmtdate fmtuser fmtrating fmtspoil diff --git a/lib/VNDB/Handler/Chars.pm b/lib/VNDB/Handler/Chars.pm index d412aae9..9824a7d9 100644 --- a/lib/VNDB/Handler/Chars.pm +++ b/lib/VNDB/Handler/Chars.pm @@ -31,7 +31,7 @@ sub page { my $metadata = { 'og:title' => $r->{name}, - 'og:description' => $r->{desc}, + 'og:description' => bb2text $r->{desc}, 'og:image' => $r->{image} && imgurl(ch => $r->{image}), }; diff --git a/lib/VNDB/Handler/Producers.pm b/lib/VNDB/Handler/Producers.pm index 42c4e7f0..a4070467 100644 --- a/lib/VNDB/Handler/Producers.pm +++ b/lib/VNDB/Handler/Producers.pm @@ -52,7 +52,7 @@ sub page { my $metadata = { 'og:title' => $p->{name}, - 'og:description' => $p->{desc}, + 'og:description' => bb2text $p->{desc}, }; $self->htmlHeader(title => $p->{name}, noindex => $rev, metadata => $metadata); diff --git a/lib/VNDB/Handler/Releases.pm b/lib/VNDB/Handler/Releases.pm index 19fa0678..3181566e 100644 --- a/lib/VNDB/Handler/Releases.pm +++ b/lib/VNDB/Handler/Releases.pm @@ -30,7 +30,7 @@ sub page { my $metadata = { 'og:title' => $r->{title}, - 'og:description' => $r->{notes}, + 'og:description' => bb2text $r->{notes}, }; $self->htmlHeader(title => $r->{title}, noindex => $rev, metadata => $metadata); diff --git a/lib/VNDB/Handler/Staff.pm b/lib/VNDB/Handler/Staff.pm index ca2f9842..4d583b68 100644 --- a/lib/VNDB/Handler/Staff.pm +++ b/lib/VNDB/Handler/Staff.pm @@ -29,7 +29,7 @@ sub page { my $metadata = { 'og:title' => $s->{name}, - 'og:description' => $s->{desc}, + 'og:description' => bb2text $s->{desc}, }; $self->htmlHeader(title => $s->{name}, noindex => $rev, metadata => $metadata); diff --git a/lib/VNDB/Handler/VNPage.pm b/lib/VNDB/Handler/VNPage.pm index 2ebb42e6..0d22f256 100644 --- a/lib/VNDB/Handler/VNPage.pm +++ b/lib/VNDB/Handler/VNPage.pm @@ -344,7 +344,7 @@ sub page { my $metadata = { 'og:title' => $v->{title}, - 'og:description' => $v->{desc}, + 'og:description' => bb2text $v->{desc}, }; if($v->{image} && !$v->{img_nsfw}) { @@ -881,10 +881,7 @@ sub _release_icons { } # Notes column - # TODO: The notes text should to through a bb2html() to strip the tags. But - # showing HTML inside a 'title' attribute won't work, and bb2html() doesn't - # have a plain text output option. - _release_icon 'notes', $rel->{notes}, 'notes' if $rel->{notes}; + _release_icon 'notes', bb2text($rel->{notes}), 'notes' if $rel->{notes}; } diff --git a/util/bbcode-test.pl b/util/bbcode-test.pl index 058fa937..1b8650cf 100755 --- a/util/bbcode-test.pl +++ b/util/bbcode-test.pl @@ -12,79 +12,102 @@ use Benchmark 'timethese'; our($ROOT, %S); BEGIN { ($ROOT = abs_path $0) =~ s{/util/bbcode-test\.pl$}{}; } use lib "$ROOT/lib"; -use VNDB::BBCode; +use VNDB::BBCode qw/bb2html bb2text/; my @tests = ( '', '', + '', '[From [url=http://www.dlSITE.com/eng/]DLsite English[/url]]', '[From DLsite English]', + '[From DLsite English]', '[url=http://example.com/]some url[/url]', 'some url', + 'some url', '[quote]some quote[/quote]', '
some quote
', + 'some quote', "[code]some code\n\nalso newlines;[/code]", '
some code

also newlines;
', + "some code\n\nalso newlines;", '[spoiler]some spoiler[/spoiler]', 'some spoiler', + '', "[raw][quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote][/raw]", "[quote]not parsed
[url=https://vndb.org/]valid url[/url]
[url=asdf]invalid url[/url][/quote]", + "[quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote]", '[quote]basic [spoiler]single[/spoiler]-line [spoiler][url=/g]tag[/url] nesting [raw](without [url=/v3333]special[/url] cases)[/raw][/spoiler][/quote]', '
basic single-line tag nesting (without [url=/v3333]special[/url] cases)
', + 'basic -line ', "[quote]rmnewline after closing tag[/quote]\n", '
rmnewline after closing tag
', + "rmnewline after closing tag\n", '[url=/v19]some vndb url[/url]', 'some vndb url', + 'some vndb url', "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace", 'quite

a

lot of



unneeded whitespace', + "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace", "[quote]\nsimple\nrmnewline\ntest\n[/quote]", '
simple
rmnewline
test
', + "\nsimple\nrmnewline\ntest\n", # the new implementation doesn't special-case [code], as the first newline shouldn't matter either way "[quote]\n\nhello, rmnewline test[code]\n#!/bin/sh\n\nfunction random_username() {\n /dev/null\n}\n[/code]\nsome text after the code tag\n[/quote]\n\n[spoiler]\nsome newlined spoiler\n[/spoiler]", '

hello, rmnewline test
#!/bin/sh

function random_username() {
</dev/urandom tr -cd \'a-zA-Z0-9\' | dd bs=1 count=16 2>/dev/null
}
some text after the code tag


some newlined spoiler
', + "\n\nhello, rmnewline test\n#!/bin/sh\n\nfunction random_username() {\n /dev/null\n}\n\nsome text after the code tag\n\n\n", "[quote]\n[raw]\nrmnewline test with made-up elements\n[/raw]\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n[/quote]", '

rmnewline test with made-up elements

welp
[dumbtag]
none
[/dumbtag]
', + "\n\nrmnewline test with made-up elements\n\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n", '[url=http://example.com/]markup in [raw][url][/raw][/url]', 'markup in [url]', + "markup in [url]", '[url=http://192.168.1.1/some/path]ipv4 address in [url][/url]', 'ipv4 address in [url]', + 'ipv4 address in [url]', 'http://192.168.1.1/some/path (literal ipv4 address)', 'link (literal ipv4 address)', + 'http://192.168.1.1/some/path (literal ipv4 address)', '[url=http://192.168.1.1:8080/some/path]ipv4 address (port included) in [url][/url]', 'ipv4 address (port included) in [url]', + 'ipv4 address (port included) in [url]', 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)', 'link (literal ipv4 address, port included)', + 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)', '[Quote]non-lowercase tags [SpOILER]here[/sPOilER][/qUOTe]', '
non-lowercase tags here
', + 'non-lowercase tags ', 'some text [spoiler]with (v17) tags[/spoiler] and internal ids such as s1', 'some text with (v17) tags and internal ids such as s1', + 'some text and internal ids such as s1', 'r12.1 v6.3 s1.2', 'r12.1 v6.3 s1.2', + 'r12.1 v6.3 s1.2', 'v17 text dds16v21 more text1 v9', 'v17 text dds16v21 more text1 v9', + 'v17 text dds16v21 more text1 v9', # Not sure what to do here #'http://some[raw].pointlessly[/raw].unusual.domain/', @@ -95,19 +118,24 @@ my @tests = ( 'html escapes (&)', '<tag>html escapes (&)</tag>', + 'html escapes (&)', '[spoiler]stray open tag', 'stray open tag', + '', # TODO: This isn't ideal '[quote][spoiler]stray open tag (nested)[/quote]', '
stray open tag (nested)[/quote]
', + '', '[quote][spoiler]two stray open tags', '
two stray open tags
', + '', "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 'that\'s [spoiler]some [quote]uncommon[/quote][/spoiler] combination', + "that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination", # > I don't see anyone using IPv6 URLs anytime soon, so I'm not worried too either way. #'[url=http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path]ipv6 address in [url][/url]', @@ -119,9 +147,11 @@ my @tests = ( # test shortening [ "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 10 ], 'that\'s ', + "that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination", [ "A https://blicky.net/ only takes 4 characters", 8 ], 'A link', + "A https://blicky.net/ only takes 4 characters", ); @@ -145,17 +175,18 @@ sub identity { sub test { - push @tests, map +($_,$_), @invalid_syntax; + push @tests, map +($_,$_,$_), @invalid_syntax; plan tests => scalar @tests; - my @t = @tests; - while(@t) { - my $input = shift @t; - my $html = shift @t; + while(@tests) { + my $input = shift @tests; + my $html = shift @tests; + my $plain = shift @tests; my @arg = ref $input ? @$input : ($input); (my $msg = $arg[0]) =~ s/\n/\\n/g; - is identity($arg[0]), $arg[0], $msg; - is bb2html(@arg), $html, $msg; + is identity($arg[0]), $arg[0], "id: $msg"; + is bb2html(@arg), $html, "html: $msg"; + is bb2text($arg[0]), $plain, "plain: $msg"; } } -- cgit v1.2.3