summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2010-11-26 22:00:41 +0100
committerYorhel <git@yorhel.nl>2010-11-26 22:09:40 +0100
commit5a74f0fe0564807657ced8e9773075c2391c5cce (patch)
tree9d42be7ab9460afc3d861917031697bc5b1b74a5
parentad17aecae0c2c947ab33edc4e616b120f8761042 (diff)
VNDBUtil: Partly rewrote bb2html() to be faster and better
It does not use split() anymore, the input string is parsed in a single pass using a global regex. It's now a lot faster on larger input. The page generation time of /t937 went back from ~350ms to ~55ms (on the beta, the production server is slower). Also made several tiny improvements while I was at it: - multiple successive newlines aren't removed within [code] - truncating a message with $maxlength also removes trailing spaces and interpunction - multiple successive spaces are removed outside of [code] (and thus don't count towards the length of the message) The function should be mostly equivalent in regards to all other things, ignoring a few minor border cases that weren't documented in the first place. The URL regex (and the idea of a global regex) came from bpaste: http://g.blicky.net/bpaste.git/commit/?id=ac7b16d0ec0f195d00a0a79698f67c3010e8cf7d
-rw-r--r--ChangeLog3
-rw-r--r--lib/VNDBUtil.pm123
2 files changed, 73 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 31a0a9e7..b0c16335 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,8 +6,9 @@
- JS: Reverted to the old selection box date selector
- JS: Split script.js into a separate file for each language
- Improved performance of update_vnpopularity() on PostgreSQL 9.0
+ - Faster and improved bb2html()
- Added WHEN clause to all SQL TRIGGERs for which it was useful
- (this *requires* PostgreSQL 9.0 or up!)
+ (this *requires* PostgreSQL 9.0 or up!)
- Added ON DELETE clause to all foreign keys referencing users (id)
- Use word-level (instead of character-level) diff for large fields
diff --git a/lib/VNDBUtil.pm b/lib/VNDBUtil.pm
index f23811a9..82363bcf 100644
--- a/lib/VNDBUtil.pm
+++ b/lib/VNDBUtil.pm
@@ -29,95 +29,114 @@ sub bb2html {
my $raw = shift;
my $maxlength = shift;
$raw =~ s/\r//g;
- $raw =~ s/\n{5,}/\n\n/g;
return '' if !$raw && $raw ne "0";
- my($result, $length, $rmnewline, @open) = ('', 0, 0, 'first');
+ my($result, $last, $length, $rmnewline, @open) = ('', 0, 0, 0, 'first');
+ # escapes, returns string, and takes care of $length and $maxlength; also
+ # takes care to remove newlines and double spaces when necessary
my $e = sub {
local $_ = shift;
+ s/^\n// if $rmnewline && $rmnewline--;
+ s/\n{5,}/\n\n/g if $open[$#open] ne 'code';
+ s/ +/ /g if $open[$#open] ne 'code';
+ $length += length $_;
+ if($maxlength && $length > $maxlength) {
+ $_ = substr($_, 0, $maxlength-$length);
+ s/[ \.,:;]+[^ \.,:;]*$//; # cleanly cut off on word boundary
+ }
s/&/&amp;/g;
s/>/&gt;/g;
s/</&lt;/g;
s/\n/<br \/>/g if !$maxlength;
- s/\n/ /g if $maxlength;
+ s/\n/ /g if $maxlength;
return $_;
};
- for (split /(\s|\n|\[[^\]]+\])/, $raw) {
- next if !defined $_;
- next if $_ eq '';
+ while($raw =~ m{(
+ ([tdvpr][1-9][0-9]*\.[1-9][0-9]*) | # 2. exid
+ ([tdvprug][1-9][0-9]*) | # 3. id
+ (\[[^\s\]]+\]) | # 4. tag
+ ((?:https?|ftp)://[^><"\n\s\]\[]+[\d\w=/-]) # 5. url
+ )}xg) {
+ my($match, $exid, $id, $tag, $url) = ($1, $2, $3, $4, $5);
- # (note to self: stop using unreadable hacks like these!)
- $rmnewline-- && $_ eq "\n" && next if $rmnewline;
+ # add string before the match
+ $result .= $e->(substr $raw, $last, (pos($raw)-length($match))-$last);
+ last if $maxlength && $length > $maxlength;
+ $last = pos $raw;
- my $lit = $_;
if($open[$#open] ne 'raw' && $open[$#open] ne 'code') {
- if (lc$_ eq '[raw]') { push @open, 'raw'; next }
- elsif (lc$_ eq '[spoiler]') { push @open, 'spoiler'; $result .= '<b class="spoiler">'; next }
- elsif (lc$_ eq '[quote]') {
- push @open, 'quote';
- $result .= '<div class="quote">' if !$maxlength;
- $rmnewline = 1;
- next
- } elsif (lc$_ eq '[code]') {
- push @open, 'code';
- $result .= '<pre>' if !$maxlength;
- $rmnewline = 1;
- next
- } elsif (lc$_ eq '[/spoiler]') {
- if($open[$#open] eq 'spoiler') {
+ # handle tags
+ if($tag) {
+ $tag = lc $tag;
+ if($tag eq '[raw]') {
+ push @open, 'raw'
+ } elsif($tag eq '[spoiler]') {
+ push @open, 'spoiler';
+ $result .= '<b class="spoiler">'
+ } elsif($tag eq '[quote]') {
+ push @open, 'quote';
+ $result .= '<div class="quote">' if !$maxlength;
+ $rmnewline = 1;
+ } elsif($tag eq '[code]') {
+ push @open, 'code';
+ $result .= '<pre>' if !$maxlength;
+ $rmnewline = 1;
+ } elsif($tag eq '[/spoiler]' && $open[$#open] eq 'spoiler') {
$result .= '</b>';
pop @open;
- }
- next;
- } elsif (lc$_ eq '[/quote]') {
- if($open[$#open] eq 'quote') {
+ } elsif($tag eq '[/quote]' && $open[$#open] eq 'quote') {
$result .= '</div>' if !$maxlength;
$rmnewline = 1;
- pop @open;
- }
- next;
- } elsif(lc$_ eq '[/url]') {
- if($open[$#open] eq 'url') {
+ } elsif($tag eq '[/url]' && $open[$#open] eq 'url') {
$result .= '</a>';
pop @open;
+ } elsif($tag =~ s{\[url=((https?://|/)[^\]>]+)\]}{<a href="$1" rel="nofollow">}i) {
+ $result .= $tag;
+ push @open, 'url';
}
next;
- } elsif(s{\[url=((https?://|/)[^\]>]+)\]}{<a href="$1" rel="nofollow">}i) {
- $result .= $_;
- push @open, 'url';
- next;
- } elsif(!grep(/url/, @open) &&
- s{(.*)(http|https)://(.+[\d\w=/-])(.*)}
- {$e->($1).qq|<a href="$2://|.$e->($3, 1).'" rel="nofollow">'.$e->('link').'</a>'.$e->($4)}e) {
+ }
+ # handle URLs
+ if($url && !grep(/url/, @open)) {
$length += 4;
last if $maxlength && $length > $maxlength;
- $result .= $_;
- next;
- } elsif(!grep(/url/, @open) && (
- s{^(.*[^\w]|)([tdvpr][1-9][0-9]*)\.([1-9][0-9]*)([^\w].*|)$}{$e->($1).qq|<a href="/$2.$3">$2.$3</a>|.$e->($4)}e ||
- s{^(.*[^\w]|)([tdvprug][1-9][0-9]*)([^\w].*|)$}{$e->($1).qq|<a href="/$2">$2</a>|.$e->($3)}e)) {
- $length += length $lit;
- last if $maxlength && $length > $maxlength;
- $result .= $_;
+ $result .= sprintf '<a href="%s" rel="nofollow">link</a>', $url;
next;
}
- } elsif($open[$#open] eq 'raw' && lc$_ eq '[/raw]') {
+ # id
+ if($id || $exid) {
+ my $r = $id || $exid;
+ if(substr($raw, $last-1-length($r), 1) !~ /[\w]/ && substr($raw, $last, 1) !~ /[\w]/) {
+ $length += length $r;
+ last if $maxlength && $length > $maxlength;
+ $result .= sprintf '<a href="/%s">%1$s</a>', $r;
+ next
+ }
+ }
+ }
+
+ if($tag && $open[$#open] eq 'raw' && lc$tag eq '[/raw]') {
pop @open;
next;
- } elsif($open[$#open] eq 'code' && lc$_ eq '[/code]') {
+ }
+
+ if($tag && $open[$#open] eq 'code' && lc$tag eq '[/code]') {
$result .= '</pre>' if !$maxlength;
pop @open;
next;
}
- # normal text processing
- $length += length $_;
+ # We'll only get here when the bbcode input isn't correct or something else
+ # didn't work out. In that case, just output whatever we've matched.
+ $result .= $e->($match);
last if $maxlength && $length > $maxlength;
- $result .= $e->($_);
}
+ # the last unmatched part, just escape and output
+ $result .= $e->(substr $raw, $last);
+
# close open tags
while((local $_ = pop @open) ne 'first') {
$result .= $_ eq 'url' ? '</a>' : $_ eq 'spoiler' ? '</b>' : '';