diff options
Diffstat (limited to 'util/bbcode-test.pl')
-rwxr-xr-x | util/bbcode-test.pl | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/util/bbcode-test.pl b/util/bbcode-test.pl new file mode 100755 index 00000000..058fa937 --- /dev/null +++ b/util/bbcode-test.pl @@ -0,0 +1,185 @@ +#!/usr/bin/perl + +# This is a test & benchmark script for VNDB::BBCode. +# Call without arguments to run the test, with any argument to run the benchmark. + +use strict; +use warnings; +use Cwd 'abs_path'; +use Test::More; +use Benchmark 'timethese'; + +our($ROOT, %S); +BEGIN { ($ROOT = abs_path $0) =~ s{/util/bbcode-test\.pl$}{}; } +use lib "$ROOT/lib"; +use VNDB::BBCode; + + +my @tests = ( + '', + '', + + '[From [url=http://www.dlSITE.com/eng/]DLsite English[/url]]', + '[From <a href="http://www.dlSITE.com/eng/" rel="nofollow">DLsite English</a>]', + + '[url=http://example.com/]some url[/url]', + '<a href="http://example.com/" rel="nofollow">some url</a>', + + '[quote]some quote[/quote]', + '<div class="quote">some quote</div>', + + "[code]some code\n\nalso newlines;[/code]", + '<pre>some code<br><br>also newlines;</pre>', + + '[spoiler]some spoiler[/spoiler]', + '<b class="spoiler">some spoiler</b>', + + "[raw][quote]not parsed\n[url=https://vndb.org/]valid url[/url]\n[url=asdf]invalid url[/url][/quote][/raw]", + "[quote]not parsed<br>[url=https://vndb.org/]valid url[/url]<br>[url=asdf]invalid url[/url][/quote]", + + '[quote]basic [spoiler]single[/spoiler]-line [spoiler][url=/g]tag[/url] nesting [raw](without [url=/v3333]special[/url] cases)[/raw][/spoiler][/quote]', + '<div class="quote">basic <b class="spoiler">single</b>-line <b class="spoiler"><a href="/g" rel="nofollow">tag</a> nesting (without [url=/v3333]special[/url] cases)</b></div>', + + "[quote]rmnewline after closing tag[/quote]\n", + '<div class="quote">rmnewline after closing tag</div>', + + '[url=/v19]some vndb url[/url]', + '<a href="/v19" rel="nofollow">some vndb url</a>', + + "quite\n\n\n\n\n\n\na\n\n\n\n\n lot of\n\n\n\nunneeded whitespace", + 'quite<br><br>a<br><br> lot of<br><br><br><br>unneeded whitespace', + + "[quote]\nsimple\nrmnewline\ntest\n[/quote]", + '<div class="quote">simple<br>rmnewline<br>test<br></div>', + + # the new implementation doesn't special-case [code], as the first newline shouldn't matter either way + "[quote]\n\nhello, rmnewline test[code]\n#!/bin/sh\n\nfunction random_username() {\n </dev/urandom tr -cd 'a-zA-Z0-9' | dd bs=1 count=16 2>/dev/null\n}\n[/code]\nsome text after the code tag\n[/quote]\n\n[spoiler]\nsome newlined spoiler\n[/spoiler]", + '<div class="quote"><br>hello, rmnewline test<pre>#!/bin/sh<br><br>function random_username() {<br> </dev/urandom tr -cd \'a-zA-Z0-9\' | dd bs=1 count=16 2>/dev/null<br>}<br></pre>some text after the code tag<br></div><br><b class="spoiler"><br>some newlined spoiler<br></b>', + + "[quote]\n[raw]\nrmnewline test with made-up elements\n[/raw]\nwelp\n[dumbtag]\nnone\n[/dumbtag]\n[/quote]", + '<div class="quote"><br>rmnewline test with made-up elements<br><br>welp<br>[dumbtag]<br>none<br>[/dumbtag]<br></div>', + + '[url=http://example.com/]markup in [raw][url][/raw][/url]', + '<a href="http://example.com/" rel="nofollow">markup in [url]</a>', + + '[url=http://192.168.1.1/some/path]ipv4 address in [url][/url]', + '<a href="http://192.168.1.1/some/path" rel="nofollow">ipv4 address in [url]</a>', + + 'http://192.168.1.1/some/path (literal ipv4 address)', + '<a href="http://192.168.1.1/some/path" rel="nofollow">link</a> (literal ipv4 address)', + + '[url=http://192.168.1.1:8080/some/path]ipv4 address (port included) in [url][/url]', + '<a href="http://192.168.1.1:8080/some/path" rel="nofollow">ipv4 address (port included) in [url]</a>', + + 'http://192.168.1.1:8080/some/path (literal ipv4 address, port included)', + '<a href="http://192.168.1.1:8080/some/path" rel="nofollow">link</a> (literal ipv4 address, port included)', + + '[Quote]non-lowercase tags [SpOILER]here[/sPOilER][/qUOTe]', + '<div class="quote">non-lowercase tags <b class="spoiler">here</b></div>', + + 'some text [spoiler]with (v17) tags[/spoiler] and internal ids such as s1', + 'some text <b class="spoiler">with (<a href="/v17">v17</a>) tags</b> and internal ids such as <a href="/s1">s1</a>', + + 'r12.1 v6.3 s1.2', + '<a href="/r12.1">r12.1</a> <a href="/v6.3">v6.3</a> <a href="/s1.2">s1.2</a>', + + 'v17 text dds16v21 more text1 v9', + '<a href="/v17">v17</a> text dds16v21 more text1 <a href="/v9">v9</a>', + + # Not sure what to do here + #'http://some[raw].pointlessly[/raw].unusual.domain/', + #'<a href="http://some.pointlessly.unusual.domain/" rel="nofollow">link</a>', + + #'[url=http://some[raw].pointlessly[/raw].unusual.domain/]hi[/url]', + #'<a href="http://some[raw].pointlessly[/raw].unusual.domain/" rel="nofollow">hi</a>', + + '<tag>html escapes (&)</tag>', + '<tag>html escapes (&)</tag>', + + '[spoiler]stray open tag', + '<b class="spoiler">stray open tag</b>', + + # TODO: This isn't ideal + '[quote][spoiler]stray open tag (nested)[/quote]', + '<div class="quote"><b class="spoiler">stray open tag (nested)[/quote]</b></div>', + + '[quote][spoiler]two stray open tags', + '<div class="quote"><b class="spoiler">two stray open tags</b></div>', + + "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", + '<a href="https://cat.xyz/" rel="nofollow">that\'s [spoiler]some [quote]uncommon[/quote][/spoiler] combination</a>', + + # > I don't see anyone using IPv6 URLs anytime soon, so I'm not worried too either way. + #'[url=http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path]ipv6 address in [url][/url]', + #'<a href="http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path" rel="nofollow">ipv6 address in [url]</a>', + + #'http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path (literal ipv6 address)', + #'<a href="http://[fedc:ba98:7654:3210:fedc:ba98:7654:3210]/some/path" rel="nofollow">link</a> (literal ipv6 address)', + + # test shortening + [ "[url=https://cat.xyz/]that's [spoiler]some [quote]uncommon[/quote][/spoiler] combination[/url]", 10 ], + '<a href="https://cat.xyz/" rel="nofollow">that\'s </a>', + + [ "A https://blicky.net/ only takes 4 characters", 8 ], + 'A <a href="https://blicky.net/" rel="nofollow">link</a>', +); + + +# output should be the same as the input +my @invalid_syntax = ( + '[url="http://example.com/"]invalid argument to the "url" tag[/url]', + '[url=nicetext]simpler invalid param[/url]', + '[url]empty "url" tag[/url]', + '[tag]custom tag[/tag]', +); + + +# Chaining all the parse() raw arguments should generate the same string as the input +sub identity { + my $ret = ''; + VNDB::BBCode::parse $_[0], sub { + $ret .= $_[0]; + }; + $ret; +} + + +sub test { + push @tests, map +($_,$_), @invalid_syntax; + plan tests => scalar @tests; + + my @t = @tests; + while(@t) { + my $input = shift @t; + my $html = shift @t; + my @arg = ref $input ? @$input : ($input); + (my $msg = $arg[0]) =~ s/\n/\\n/g; + is identity($arg[0]), $arg[0], $msg; + is bb2html(@arg), $html, $msg; + } +} + + +# Performance comparison with old implementation +sub bench { + my $plain = "This isn't a terribly interesting [string]. "x1000; + my $short = "Nobody ev3r v10 uses v5 so s1 many [url=https://blicky.net/]x[raw]y[/raw][/url] tags. "; + my $heavy = $short x100; + timethese(0, { + short => sub { bb2html($short) }, + plain => sub { bb2html($plain) }, + heavy => sub { bb2html($heavy) }, + }); + # old: + # heavy: 3 wallclock secs ( 3.15 usr + 0.00 sys = 3.15 CPU) @ 357.46/s (n=1126) + # plain: 3 wallclock secs ( 3.20 usr + 0.00 sys = 3.20 CPU) @ 130.00/s (n=416) + # short: 3 wallclock secs ( 3.17 usr + 0.00 sys = 3.17 CPU) @ 31420.82/s (n=99604) + # new: + # heavy: 3 wallclock secs ( 3.23 usr + 0.00 sys = 3.23 CPU) @ 242.11/s (n=782) + # plain: 3 wallclock secs ( 3.12 usr + 0.00 sys = 3.12 CPU) @ 124.04/s (n=387) + # short: 3 wallclock secs ( 3.18 usr + 0.00 sys = 3.18 CPU) @ 21018.55/s (n=66839) + # That's a bit of a performance hit, but should still be fast enough. +} + +test if !@ARGV; +bench if @ARGV; |