#!/usr/bin/perl # This is a test & benchmark script for VNDB::BBCode. # Call without arguments to run the test, with any argument to run the benchmark. use strict; use warnings; use Cwd 'abs_path'; use Test::More; use Benchmark 'timethese'; our($ROOT, %S); BEGIN { ($ROOT = abs_path $0) =~ s{/util/bbcode-test\.pl$}{}; } use lib "$ROOT/lib"; use VNDB::BBCode qw/bb2html bb2text/; my @tests = ( '', '', '', '[From [url=http://www.dlSITE.com/eng/]DLsite English[/url]]', '[From DLsite English]', '[From DLsite English]', '[url=http://example.com/]some url[/url]', 'some url', 'some url', '[quote]some quote[/quote]', '
some quote
', 'some quote', "[code]some code\n\nalso newlines;[/code]", '
some code

also newlines;
', "some code\n\nalso newlines;", '[spoiler]some spoiler[/spoiler]', 'some spoiler', '', "[raw][quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote][/raw]", "[quote]not parsed
[url=https://vndb.org/]valid url[/url]
[url=asdf]invalid url[/url][/quote]", "[quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote]", '[quote]basic [spoiler]single[/spoiler]-line [spoiler][url=/g]tag[/url] nesting [raw](without [url=/v3333]special[/url] cases)[/raw][/spoiler][/quote]', '
basic single-line tag nesting (without [url=/v3333]special[/url] cases)
', 'basic -line ', "[quote]rmnewline after closing tag[/quote]\n", '
rmnewline after closing tag
', "rmnewline after closing tag\n", '[url=/v19]some vndb url[/url]', 'some vndb url', 'some vndb url', "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace", 'quite

a

lot of



unneeded whitespace', "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace", "[quote]\nsimple\nrmnewline\ntest\n[/quote]", '
simple
rmnewline
test
', "\nsimple\nrmnewline\ntest\n", # the new implementation doesn't special-case [code], as the first newline shouldn't matter either way "[quote]\n\nhello, rmnewline test[code]\n#!/bin/sh\n\nfunction random_username() {\n /dev/null\n}\n[/code]\nsome text after the code tag\n[/quote]\n\n[spoiler]\nsome newlined spoiler\n[/spoiler]", '

hello, rmnewline test
#!/bin/sh

function random_username() {
</dev/urandom tr -cd \'a-zA-Z0-9\' | dd bs=1 count=16 2>/dev/null
}
some text after the code tag


some newlined spoiler
', "\n\nhello, rmnewline test\n#!/bin/sh\n\nfunction random_username() {\n /dev/null\n}\n\nsome text after the code tag\n\n\n", "[quote]\n[raw]\nrmnewline test with made-up elements\n[/raw]\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n[/quote]", '

rmnewline test with made-up elements

welp
[dumbtag]
none
[/dumbtag]
', "\n\nrmnewline test with made-up elements\n\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n", '[url=http://example.com/]markup in [raw][url][/raw][/url]', 'markup in [url]', "markup in [url]", '[url=http://192.168.1.1/some/path]ipv4 address in [url][/url]', 'ipv4 address in [url]', 'ipv4 address in [url]', 'http://192.168.1.1/some/path (literal ipv4 address)', 'link (literal ipv4 address)', 'http://192.168.1.1/some/path (literal ipv4 address)', '[url=http://192.168.1.1:8080/some/path]ipv4 address (port included) in [url][/url]', 'ipv4 address (port included) in [url]', 'ipv4 address (port included) in [url]', 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)', 'link (literal ipv4 address, port included)', 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)', '[Quote]non-lowercase tags [SpOILER]here[/sPOilER][/qUOTe]', '
non-lowercase tags here
', 'non-lowercase tags ', 'some text [spoiler]with (v17) tags[/spoiler] and internal ids such as s1', 'some text with (v17) tags and internal ids such as s1', 'some text and internal ids such as s1', 'r12.1 v6.3 s1.2', 'r12.1 v6.3 s1.2', 'r12.1 v6.3 s1.2', 'd3 d1.3 d2#4 d5#6.7', 'd3 d1.3 d2#4 d5#6.7', 'd3 d1.3 d2#4 d5#6.7', 'v17 text dds16v21 more text1 v9', 'v17 text dds16v21 more text1 v9', 'v17 text dds16v21 more text1 v9', # https://vndb.org/t2520.233 '[From[url=http://densetsu.com/display.php?id=468&style=alphabetical] Anime Densetsu[/url]]', '[From Anime Densetsu]', '[From Anime Densetsu]', # Not sure what to do here #'http://some[raw].pointlessly[/raw].unusual.domain/', #'link', #'[url=http://some[raw].pointlessly[/raw].unusual.domain/]hi[/url]', #'hi', 'html escapes (&)', '<tag>html escapes (&)</tag>', 'html escapes (&)', '[spoiler]stray open tag', 'stray open tag', '', # TODO: This isn't ideal '[quote][spoiler]stray open tag (nested)[/quote]', '
stray open tag (nested)[/quote]
', '', '[quote][spoiler]two stray open tags', '
two stray open tags
', '', "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 'that\'s [spoiler]some [quote]uncommon[/quote][/spoiler] combination', "that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination", # > I don't see anyone using IPv6 URLs anytime soon, so I'm not worried too either way. #'[url=http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path]ipv6 address in [url][/url]', #'ipv6 address in [url]', #'http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path (literal ipv6 address)', #'link (literal ipv6 address)', # test shortening [ "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 10 ], 'that\'s ', "that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination", [ "A https://blicky.net/ only takes 4 characters", 8 ], 'A link', "A https://blicky.net/ only takes 4 characters", ); # output should be the same as the input my @invalid_syntax = ( '[url="http://example.com/"]invalid argument to the "url" tag[/url]', '[url=nicetext]simpler invalid param[/url]', '[url]empty "url" tag[/url]', '[tag]custom tag[/tag]', # https://vndb.org/t2520.231 'pov1', ); # Chaining all the parse() raw arguments should generate the same string as the input sub identity { my $ret = ''; VNDB::BBCode::parse $_[0], sub { $ret .= $_[0]; }; $ret; } sub test { push @tests, map +($_,$_,$_), @invalid_syntax; plan tests => scalar @tests; while(@tests) { my $input = shift @tests; my $html = shift @tests; my $plain = shift @tests; my @arg = ref $input ? @$input : ($input); (my $msg = $arg[0]) =~ s/\n/\\n/g; is identity($arg[0]), $arg[0], "id: $msg"; is bb2html(@arg), $html, "html: $msg"; is bb2text($arg[0]), $plain, "plain: $msg"; } } # Performance comparison with old implementation sub bench { my $plain = "This isn't a terribly interesting [string]. "x1000; my $short = "Nobody ev3r v10 uses v5 so s1 many [url=https://blicky.net/]x[raw]y[/raw][/url] tags. "; my $heavy = $short x100; timethese(0, { short => sub { bb2html($short) }, plain => sub { bb2html($plain) }, heavy => sub { bb2html($heavy) }, }); # old: # heavy: 3 wallclock secs ( 3.15 usr + 0.00 sys = 3.15 CPU) @ 357.46/s (n=1126) # plain: 3 wallclock secs ( 3.20 usr + 0.00 sys = 3.20 CPU) @ 130.00/s (n=416) # short: 3 wallclock secs ( 3.17 usr + 0.00 sys = 3.17 CPU) @ 31420.82/s (n=99604) # new: # heavy: 3 wallclock secs ( 3.23 usr + 0.00 sys = 3.23 CPU) @ 242.11/s (n=782) # plain: 3 wallclock secs ( 3.12 usr + 0.00 sys = 3.12 CPU) @ 124.04/s (n=387) # short: 3 wallclock secs ( 3.18 usr + 0.00 sys = 3.18 CPU) @ 21018.55/s (n=66839) # That's a bit of a performance hit, but should still be fast enough. } test if !@ARGV; bench if @ARGV;