From f51c7276fc65c2d1d46d3c8f915fb1e1412693e8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Sep 2015 13:58:01 +0200 Subject: Handler::Discussions: Use ts_headline() to format search results And also fix strip_bb_tags() to be case-insensitive and fix a bug in converting the query into a tsquery. --- lib/VNDB/DB/Discussions.pm | 22 ++++++++++++++++++++-- lib/VNDB/Handler/Discussions.pm | 16 +++++++++++++--- util/sql/func.sql | 8 +++++++- util/updates/update_2.25.sql | 8 +++++++- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/lib/VNDB/DB/Discussions.pm b/lib/VNDB/DB/Discussions.pm index 723170e2..a85205f2 100644 --- a/lib/VNDB/DB/Discussions.pm +++ b/lib/VNDB/DB/Discussions.pm @@ -193,9 +193,10 @@ sub dbPostGet { ); my @select = ( - qw|tp.num tp.msg tp.hidden|, q|extract('epoch' from tp.date) as date|, q|extract('epoch' from tp.edited) as edited|, + qw|tp.tid tp.num tp.hidden|, q|extract('epoch' from tp.date) as date|, q|extract('epoch' from tp.edited) as edited|, + $o{search} ? () : 'tp.msg', $o{what} =~ /user/ ? qw|tp.uid u.username| : (), - $o{what} =~ /thread/ ? (qw|tp.tid t.title|, 't.hidden AS thread_hidden') : (), + $o{what} =~ /thread/ ? ('t.title', 't.hidden AS thread_hidden') : (), ); my @join = ( $o{what} =~ /user/ ? 'JOIN users u ON u.id = tp.uid' : (), @@ -216,6 +217,23 @@ sub dbPostGet { join(', ', @select), join(' ', @join), \%where, $order ); + # Get headlines in a separate query + if($o{search} && $#$r) { + my %r = map { + ($r->[$_]{tid}.'.'.$r->[$_]{num}, $_) + } 0..$#$r; + my $where = join ' or ', ('(tid = ? and num = ?)')x@$r; + my @where = map +($_->{tid},$_->{num}), @$r; + my $h = join ',', map "$_=$o{headline}{$_}", $o{headline} ? keys %{$o{headline}} : (); + + $r->[$r{$_->{tid}.'.'.$_->{num}}]{headline} = $_->{headline} for (@{$self->dbAll(qq| + SELECT tid, num, ts_headline('english', strip_bb_tags(strip_spoilers(msg)), to_tsquery(?), ?) as headline + FROM threads_posts + WHERE $where|, + $o{search}, $h, @where + )}); + } + return wantarray ? ($r, $np) : $r; } diff --git a/lib/VNDB/Handler/Discussions.pm b/lib/VNDB/Handler/Discussions.pm index 27548baf..0362dce3 100644 --- a/lib/VNDB/Handler/Discussions.pm +++ b/lib/VNDB/Handler/Discussions.pm @@ -415,7 +415,7 @@ sub search { # TODO: Allow or-matching too. But what syntax? (my $ts = $frm->{bq}) =~ y{+|&:*()="';!?$%^\\[]{}<>~` }{ }s; $ts =~ s/ / & /g; - $ts =~ y/-/!/; + $ts =~ s/(^| )-([^ ]+)/ !$1 /; ($l, $np) = $self->dbPostGet( keys %boards ? ( type => [keys %boards] ) : (), search => $ts, @@ -424,6 +424,13 @@ sub search { hide => 1, what => 'thread user', sort => 'date', reverse => 1, + headline => { + # HACK: The bbcodes are stripped from the original messages when + # creating the headline, so they are guaranteed not to show up in the + # message. This means we can re-use them for highlighting without + # worrying that they conflict with the message contents. + MaxFragments => 2, MinWords => 15, MaxWords => 40, StartSel => '[raw]', StopSel => '[/raw]', FragmentDelimiter => '[code]', + }, ); } @@ -460,9 +467,12 @@ sub search { div class => 'title'; a href => $link, $l->{title}; end; - # TODO: ts_headline() or something like it. + my $h = xml_escape $l->{headline}; + $h =~ s/\[raw\]//g; + $h =~ s/\[\/raw\]/<\/b>/g; + $h =~ s/\[code\]/...<\/b>
/g; div class => 'thread'; - lit bb2html($l->{msg}, 300); + lit $h; end; end; end; diff --git a/util/sql/func.sql b/util/sql/func.sql index 6bd483de..6f675479 100644 --- a/util/sql/func.sql +++ b/util/sql/func.sql @@ -13,10 +13,16 @@ -- strip_bb_tags(text) - simple utility function to aid full-text searching CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$ - SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'g'); + SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'gi'); $$ LANGUAGE sql IMMUTABLE; +-- BUG: Since this isn't a full bbcode parser, [spoiler] tags inside [raw] or [code] are still considered spoilers. +CREATE OR REPLACE FUNCTION strip_spoilers(t text) RETURNS text AS $$ + -- The website doesn't require the [spoiler] tag to be closed, the outer replace catches that case. + SELECT regexp_replace(regexp_replace(t, '\[spoiler\].*?\[/spoiler\]', ' ', 'ig'), '\[spoiler\].*', ' ', 'i'); +$$ LANGUAGE sql IMMUTABLE; + -- update_vncache(id) - updates the c_* columns in the vn table CREATE OR REPLACE FUNCTION update_vncache(integer) RETURNS void AS $$ diff --git a/util/updates/update_2.25.sql b/util/updates/update_2.25.sql index bba0662b..138ec7a0 100644 --- a/util/updates/update_2.25.sql +++ b/util/updates/update_2.25.sql @@ -53,7 +53,13 @@ ALTER TABLE threads_boards ALTER COLUMN type TYPE board_type USING trim(type)::b -- Full-text board search CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$ - SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'g'); + SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'gi'); $$ LANGUAGE sql IMMUTABLE; CREATE INDEX threads_posts_ts ON threads_posts USING gin(to_tsvector('english', strip_bb_tags(msg))); + +-- BUG: Since this isn't a full bbcode parser, [spoiler] tags inside [raw] or [code] are still considered spoilers. +CREATE OR REPLACE FUNCTION strip_spoilers(t text) RETURNS text AS $$ + -- The website doesn't require the [spoiler] tag to be closed, the outer replace catches that case. + SELECT regexp_replace(regexp_replace(t, '\[spoiler\].*?\[/spoiler\]', ' ', 'ig'), '\[spoiler\].*', ' ', 'i'); +$$ LANGUAGE sql IMMUTABLE; -- cgit v1.2.3