summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2018-01-06 12:29:45 +0100
committerYorhel <git@yorhel.nl>2018-01-06 12:29:45 +0100
commit000cb5720d2db0a744797bc0b9c30df70efd3fb6 (patch)
tree0f8b8dcf6b22804d7f17a791cc080a8fe6d1eab6 /util
parent55a153941cd84de61fa70d73cde527c3aff5a63c (diff)
Rewrite bb2html() to be more flexible
This is based on the API that I described in https://vndb.org/t5564.12 It's mostly bug-compatible with the old bb2html(), main differences: - <br /> -> <br> for no reason - Doesn't sporadically add a wrong </div> - $rmwhitespace now also after [/code] Most of the test cases were contributed by flan <flan@flande.re>
Diffstat (limited to 'util')
-rwxr-xr-xutil/bbcode-test.pl185
1 files changed, 185 insertions, 0 deletions
diff --git a/util/bbcode-test.pl b/util/bbcode-test.pl
new file mode 100755
index 00000000..058fa937
--- /dev/null
+++ b/util/bbcode-test.pl
@@ -0,0 +1,185 @@
+#!/usr/bin/perl
+
+# This is a test & benchmark script for VNDB::BBCode.
+# Call without arguments to run the test, with any argument to run the benchmark.
+
+use strict;
+use warnings;
+use Cwd 'abs_path';
+use Test::More;
+use Benchmark 'timethese';
+
+our($ROOT, %S);
+BEGIN { ($ROOT = abs_path $0) =~ s{/util/bbcode-test\.pl$}{}; }
+use lib "$ROOT/lib";
+use VNDB::BBCode;
+
+
+my @tests = (
+ '',
+ '',
+
+ '[From [url=http://www.dlSITE.com/eng/]DLsite English[/url]]',
+ '[From <a href="http://www.dlSITE.com/eng/" rel="nofollow">DLsite English</a>]',
+
+ '[url=http://example.com/]some url[/url]',
+ '<a href="http://example.com/" rel="nofollow">some url</a>',
+
+ '[quote]some quote[/quote]',
+ '<div class="quote">some quote</div>',
+
+ "[code]some code\n\nalso newlines;[/code]",
+ '<pre>some code<br><br>also newlines;</pre>',
+
+ '[spoiler]some spoiler[/spoiler]',
+ '<b class="spoiler">some spoiler</b>',
+
+ "[raw][quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote][/raw]",
+ "[quote]not parsed<br>[url=https://vndb.org/]valid url[/url]<br>[url=asdf]invalid url[/url][/quote]",
+
+ '[quote]basic [spoiler]single[/spoiler]-line [spoiler][url=/g]tag[/url] nesting [raw](without [url=/v3333]special[/url] cases)[/raw][/spoiler][/quote]',
+ '<div class="quote">basic <b class="spoiler">single</b>-line <b class="spoiler"><a href="/g" rel="nofollow">tag</a> nesting (without [url=/v3333]special[/url] cases)</b></div>',
+
+ "[quote]rmnewline after closing tag[/quote]\n",
+ '<div class="quote">rmnewline after closing tag</div>',
+
+ '[url=/v19]some vndb url[/url]',
+ '<a href="/v19" rel="nofollow">some vndb url</a>',
+
+ "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace",
+ 'quite<br><br>a<br><br> lot of<br><br><br><br>unneeded whitespace',
+
+ "[quote]\nsimple\nrmnewline\ntest\n[/quote]",
+ '<div class="quote">simple<br>rmnewline<br>test<br></div>',
+
+ # the new implementation doesn't special-case [code], as the first newline shouldn't matter either way
+ "[quote]\n\nhello, rmnewline test[code]\n#!/bin/sh\n\nfunction random_username() {\n </dev/urandom tr -cd 'a-zA-Z0-9' | dd bs=1 count=16 2>/dev/null\n}\n[/code]\nsome text after the code tag\n[/quote]\n\n[spoiler]\nsome newlined spoiler\n[/spoiler]",
+ '<div class="quote"><br>hello, rmnewline test<pre>#!/bin/sh<br><br>function random_username() {<br> &lt;/dev/urandom tr -cd \'a-zA-Z0-9\' | dd bs=1 count=16 2&gt;/dev/null<br>}<br></pre>some text after the code tag<br></div><br><b class="spoiler"><br>some newlined spoiler<br></b>',
+
+ "[quote]\n[raw]\nrmnewline test with made-up elements\n[/raw]\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n[/quote]",
+ '<div class="quote"><br>rmnewline test with made-up elements<br><br>welp<br>[dumbtag]<br>none<br>[/dumbtag]<br></div>',
+
+ '[url=http://example.com/]markup in [raw][url][/raw][/url]',
+ '<a href="http://example.com/" rel="nofollow">markup in [url]</a>',
+
+ '[url=http://192.168.1.1/some/path]ipv4 address in [url][/url]',
+ '<a href="http://192.168.1.1/some/path" rel="nofollow">ipv4 address in [url]</a>',
+
+ 'http://192.168.1.1/some/path (literal ipv4 address)',
+ '<a href="http://192.168.1.1/some/path" rel="nofollow">link</a> (literal ipv4 address)',
+
+ '[url=http://192.168.1.1:8080/some/path]ipv4 address (port included) in [url][/url]',
+ '<a href="http://192.168.1.1:8080/some/path" rel="nofollow">ipv4 address (port included) in [url]</a>',
+
+ 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)',
+ '<a href="http://192.168.1.1:8080/some/path" rel="nofollow">link</a> (literal ipv4 address, port included)',
+
+ '[Quote]non-lowercase tags [SpOILER]here[/sPOilER][/qUOTe]',
+ '<div class="quote">non-lowercase tags <b class="spoiler">here</b></div>',
+
+ 'some text [spoiler]with (v17) tags[/spoiler] and internal ids such as s1',
+ 'some text <b class="spoiler">with (<a href="/v17">v17</a>) tags</b> and internal ids such as <a href="/s1">s1</a>',
+
+ 'r12.1 v6.3 s1.2',
+ '<a href="/r12.1">r12.1</a> <a href="/v6.3">v6.3</a> <a href="/s1.2">s1.2</a>',
+
+ 'v17 text dds16v21 more text1 v9',
+ '<a href="/v17">v17</a> text dds16v21 more text1 <a href="/v9">v9</a>',
+
+ # Not sure what to do here
+ #'http://some[raw].pointlessly[/raw].unusual.domain/',
+ #'<a href="http://some.pointlessly.unusual.domain/" rel="nofollow">link</a>',
+
+ #'[url=http://some[raw].pointlessly[/raw].unusual.domain/]hi[/url]',
+ #'<a href="http://some[raw].pointlessly[/raw].unusual.domain/" rel="nofollow">hi</a>',
+
+ '<tag>html escapes (&)</tag>',
+ '&lt;tag&gt;html escapes (&amp;)&lt;/tag&gt;',
+
+ '[spoiler]stray open tag',
+ '<b class="spoiler">stray open tag</b>',
+
+ # TODO: This isn't ideal
+ '[quote][spoiler]stray open tag (nested)[/quote]',
+ '<div class="quote"><b class="spoiler">stray open tag (nested)[/quote]</b></div>',
+
+ '[quote][spoiler]two stray open tags',
+ '<div class="quote"><b class="spoiler">two stray open tags</b></div>',
+
+ "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]",
+ '<a href="https://cat.xyz/" rel="nofollow">that\'s [spoiler]some [quote]uncommon[/quote][/spoiler] combination</a>',
+
+ # > I don't see anyone using IPv6 URLs anytime soon, so I'm not worried too either way.
+ #'[url=http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path]ipv6 address in [url][/url]',
+ #'<a href="http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path" rel="nofollow">ipv6 address in [url]</a>',
+
+ #'http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path (literal ipv6 address)',
+ #'<a href="http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path" rel="nofollow">link</a> (literal ipv6 address)',
+
+ # test shortening
+ [ "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 10 ],
+ '<a href="https://cat.xyz/" rel="nofollow">that\'s </a>',
+
+ [ "A https://blicky.net/ only takes 4 characters", 8 ],
+ 'A <a href="https://blicky.net/" rel="nofollow">link</a>',
+);
+
+
+# output should be the same as the input
+my @invalid_syntax = (
+ '[url="http://example.com/"]invalid argument to the "url" tag[/url]',
+ '[url=nicetext]simpler invalid param[/url]',
+ '[url]empty "url" tag[/url]',
+ '[tag]custom tag[/tag]',
+);
+
+
+# Chaining all the parse() raw arguments should generate the same string as the input
+sub identity {
+ my $ret = '';
+ VNDB::BBCode::parse $_[0], sub {
+ $ret .= $_[0];
+ };
+ $ret;
+}
+
+
+sub test {
+ push @tests, map +($_,$_), @invalid_syntax;
+ plan tests => scalar @tests;
+
+ my @t = @tests;
+ while(@t) {
+ my $input = shift @t;
+ my $html = shift @t;
+ my @arg = ref $input ? @$input : ($input);
+ (my $msg = $arg[0]) =~ s/\n/\\n/g;
+ is identity($arg[0]), $arg[0], $msg;
+ is bb2html(@arg), $html, $msg;
+ }
+}
+
+
+# Performance comparison with old implementation
+sub bench {
+ my $plain = "This isn't a terribly interesting [string]. "x1000;
+ my $short = "Nobody ev3r v10 uses v5 so s1 many [url=https://blicky.net/]x[raw]y[/raw][/url] tags. ";
+ my $heavy = $short x100;
+ timethese(0, {
+ short => sub { bb2html($short) },
+ plain => sub { bb2html($plain) },
+ heavy => sub { bb2html($heavy) },
+ });
+ # old:
+ # heavy: 3 wallclock secs ( 3.15 usr + 0.00 sys = 3.15 CPU) @ 357.46/s (n=1126)
+ # plain: 3 wallclock secs ( 3.20 usr + 0.00 sys = 3.20 CPU) @ 130.00/s (n=416)
+ # short: 3 wallclock secs ( 3.17 usr + 0.00 sys = 3.17 CPU) @ 31420.82/s (n=99604)
+ # new:
+ # heavy: 3 wallclock secs ( 3.23 usr + 0.00 sys = 3.23 CPU) @ 242.11/s (n=782)
+ # plain: 3 wallclock secs ( 3.12 usr + 0.00 sys = 3.12 CPU) @ 124.04/s (n=387)
+ # short: 3 wallclock secs ( 3.18 usr + 0.00 sys = 3.18 CPU) @ 21018.55/s (n=66839)
+ # That's a bit of a performance hit, but should still be fast enough.
+}
+
+test if !@ARGV;
+bench if @ARGV;