From 438d4df64d950f8905bd31bddc203d408f42f125 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Sep 2015 01:35:48 +0200 Subject: Implement discussion board search function Inspired by wakaranai's implementation at https://github.com/morkt/vndb/commit/b852c87ad145fdaaa09c79b6378dd819b46f7e87 This version is different in a number of aspects: - Separate search functions for title search and fulltext post search. Perhaps not the most convenient option, but the downside of a combined search is that if the query matches the threads' title, then all of the posts in that thread will show up in the results. This didn't seem very useful. - Sorting is based purely on post date. Rank-based sort is slow without a separate caching column, and in my opinion not all that useful. Implementation differences: - Integrated in the existing DB::Discussions functions, so less code to maintain and more code reuse. - No separate caching column for the tsvector, a functional index is used instead. This is a bit slower (index results need to be re-checked against the actual messages, hence the slowdown), but has the advantage of smaller database dumps and less complexity in updating the cache. Things to fix or look at: - Highlighting of the search query in message contents. - Allow or-style query matching --- data/lang.txt | 113 +++++++++++++++++++++++++++++++++++++- data/style.css | 5 ++ lib/VNDB/DB/Discussions.pm | 21 +++++-- lib/VNDB/Handler/Discussions.pm | 119 +++++++++++++++++++++++++++++++++++++--- lib/VNDB/Util/FormHTML.pm | 3 +- util/sql/all.sql | 4 ++ util/sql/func.sql | 6 ++ util/updates/update_2.25.sql | 8 +++ 8 files changed, 265 insertions(+), 14 deletions(-) diff --git a/data/lang.txt b/data/lang.txt index db3fba56..ea7677bf 100644 --- a/data/lang.txt +++ b/data/lang.txt @@ -4820,7 +4820,118 @@ uk : Головна сторінка форуму it : Indice area messaggi -# Thread list (on discussion board index and board browser) +# Discussion board search + +:_dissearch_title +en : Search the discussion board +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_query +en : Query +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_titleonly +en : Only search thread titles +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_boards +en : Boars +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_noresults_title +en : No results +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_noresults_msg +en : No threads or messages found matching your criteria. +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_col_date +en : Date +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_col_user +en : User +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + +:_dissearch_col_msg +en : Message +ru*: +cs*: +hu*: +nl*: +de*: +es*: +tr*: +uk*: +it*: + + +# Thread list (on discussion board index, search and board browser) :_threadlist_col_topic en : Topic diff --git a/data/style.css b/data/style.css index ed489aa9..736fb910 100644 --- a/data/style.css +++ b/data/style.css @@ -275,6 +275,7 @@ p#searchtabs a:hover, p#searchtabs a.sel { background: $secbg$ url($_boxbg$) repeat; } #q { width: 600px } +#bq { width: 300px } @@ -313,6 +314,10 @@ div.mainbox.discussions b.boards a { color: $grayedout$; } div.discussions td.tc2 { width: 50px; } div.discussions td.tc3 { width: 90px; } div.discussions td.tc4 { width: 170px; } +div.postsearch td.tc1_1 { width: 60px; padding-left: 0; padding-right: 0; text-align: right } +div.postsearch td.tc1_2 { width: 25px; padding-left: 0 } +div.postsearch td.tc2 { width: 65px; } +div.postsearch td.tc3 { width: 90px; } h1.boxtitle, h1.boxtitle a { font-family: "Futura", "Century New Gothic", "Arial", Serif; font-weight: bold; diff --git a/lib/VNDB/DB/Discussions.pm b/lib/VNDB/DB/Discussions.pm index 574560a0..723170e2 100644 --- a/lib/VNDB/DB/Discussions.pm +++ b/lib/VNDB/DB/Discussions.pm @@ -8,7 +8,7 @@ use Exporter 'import'; our @EXPORT = qw|dbThreadGet dbThreadEdit dbThreadAdd dbPostGet dbPostEdit dbPostAdd dbThreadCount|; -# Options: id, type, iid, results, page, what, notusers, sort, reverse +# Options: id, type, iid, results, page, what, notusers, search, sort, reverse # What: boards, boardtitles, firstpost, lastpost # Sort: id lastpost sub dbThreadGet { @@ -17,19 +17,26 @@ sub dbThreadGet { $o{page} ||= 1; $o{what} ||= ''; - my %where = ( + my @where = ( $o{id} ? ( 't.id = ?' => $o{id} ) : (), !$o{id} ? ( 't.hidden = FALSE' => 0 ) : (), $o{type} && !$o{iid} ? ( - 't.id IN(SELECT tid FROM threads_boards WHERE type = ?)' => $o{type} ) : (), + 't.id IN(SELECT tid FROM threads_boards WHERE type IN(!l))' => [ ref $o{type} ? $o{type} : [ $o{type} ] ] ) : (), $o{type} && $o{iid} ? ( 'tb.type = ?' => $o{type}, 'tb.iid = ?' => $o{iid} ) : (), $o{notusers} ? ( 't.id NOT IN(SELECT tid FROM threads_boards WHERE type = \'u\')' => 1) : (), ); + if($o{search}) { + for (split /[ -,._]/, $o{search}) { + s/%//g; + push @where, 't.title ilike ?', "%$_%" if length($_) > 0; + } + } + my @select = ( qw|t.id t.title t.count t.locked t.hidden|, $o{what} =~ /firstpost/ ? ('tpf.uid AS fuid', q|EXTRACT('epoch' from tpf.date) AS fdate|, 'uf.username AS fusername') : (), @@ -60,7 +67,7 @@ sub dbThreadGet { !s !W ORDER BY !s|, - join(', ', @select), join(' ', @join), \%where, $order + join(', ', @select), join(' ', @join), \@where, $order ); if($o{what} =~ /(boards|boardtitles)/ && $#$r >= 0) { @@ -158,7 +165,7 @@ sub dbThreadCount { } -# Options: tid, num, what, uid, mindate, hide, page, results, sort, reverse +# Options: tid, num, what, uid, mindate, hide, search, type, page, results, sort, reverse # what: user thread sub dbPostGet { my($self, %o) = @_; @@ -179,6 +186,10 @@ sub dbPostGet { 'tp.hidden = FALSE' => 1 ) : (), $o{hide} && $o{what} =~ /thread/ ? ( 't.hidden = FALSE' => 1 ) : (), + $o{search} ? ( + q{to_tsvector('english', strip_bb_tags(msg)) @@ to_tsquery(?)} => $o{search}) : (), + $o{type} ? ( + 'tp.tid = ANY(ARRAY(SELECT tid FROM threads_boards WHERE type IN(!l)))' => [ ref $o{type} ? $o{type} : [ $o{type} ] ] ) : (), ); my @select = ( diff --git a/lib/VNDB/Handler/Discussions.pm b/lib/VNDB/Handler/Discussions.pm index 755d14c9..d416d26b 100644 --- a/lib/VNDB/Handler/Discussions.pm +++ b/lib/VNDB/Handler/Discussions.pm @@ -3,7 +3,7 @@ package VNDB::Handler::Discussions; use strict; use warnings; -use TUWF ':html', 'xml_escape'; +use TUWF ':html', 'xml_escape', 'uri_escape'; use POSIX 'ceil'; use VNDB::Func; @@ -15,6 +15,7 @@ TUWF::register( qr{t([1-9]\d*)/reply} => \&edit, qr{t([1-9]\d*)\.([1-9]\d*)/edit} => \&edit, qr{t/(db|an|ge|[vpu])([1-9]\d*)?/new} => \&edit, + qr{t/search} => \&search, qr{t} => \&index, ); @@ -341,12 +342,18 @@ sub index { my $self = shift; $self->htmlHeader(title => mt('_disindex_title'), noindex => 1, feeds => [ 'posts', 'announcements' ]); - div class => 'mainbox'; - h1 mt '_disindex_title'; - p class => 'browseopts'; - a href => '/t/all', mt '_disboard_item_all'; - a href => '/t/'.$_, mt "_dboard_$_" - for (@{$self->{discussion_boards}}); + form action => '/t/search', method => 'get'; + div class => 'mainbox'; + h1 mt '_disindex_title'; + fieldset class => 'search'; + input type => 'text', name => 'bq', id => 'bq', class => 'text'; + input type => 'submit', class => 'submit', value => mt '_searchbox_submit'; + end 'fieldset'; + p class => 'browseopts'; + a href => '/t/all', mt '_disboard_item_all'; + a href => '/t/'.$_, mt "_dboard_$_" + for (@{$self->{discussion_boards}}); + end; end; end; @@ -368,6 +375,104 @@ sub index { } +sub search { + my $self = shift; + + my $frm = $self->formValidate( + { get => 'bq', required => 0, maxlength => 100 }, + { get => 'b', required => 0, multi => 1, enum => $self->{discussion_boards} }, + { get => 't', required => 0 }, + { get => 'p', required => 0, default => 1, template => 'int' }, + ); + return $self->resNotFound if $frm->{_err}; + + $self->htmlHeader(title => mt('_dissearch_title'), noindex => 1); + $self->htmlForm({ frm => $frm, action => '/t/search', method => 'get', nosubmit => 1 }, 'boardsearch' => [mt('_dissearch_title'), + [ input => short => 'bq', name => mt('_dissearch_query') ], + [ check => short => 't', name => mt('_dissearch_titleonly') ], + [ select => short => 'b', name => mt('_dissearch_boards'), multi => 1, size => scalar @{$self->{discussion_boards}}, + options => [ map [$_,mt("_dboard_$_")], @{$self->{discussion_boards}} ] ], + [ static => content => sub { + input type => 'submit', class => 'submit', tabindex => 10, value => mt '_searchbox_submit'; + } ], + ]); + return $self->htmlFooter if !$frm->{bq}; + + my %boards = map +($_,1), @{$frm->{b}}; + %boards = () if keys %boards == @{$self->{discussion_boards}}; + + my($l, $np); + if($frm->{t}) { + ($l, $np) = $self->dbThreadGet( + keys %boards ? ( type => [keys %boards] ) : (), + search => $frm->{bq}, + results => 50, + page => $frm->{p}, + what => 'firstpost lastpost boardtitles', + sort => 'lastpost', reverse => 1, + ); + } else { + # TODO: Allow or-matching too. But what syntax? + (my $ts = $frm->{bq}) =~ y{+|&:*()="';!?$%^\\[]{}<>~` }{ }s; + $ts =~ s/ / & /g; + $ts =~ y/-/!/; + ($l, $np) = $self->dbPostGet( + keys %boards ? ( type => [keys %boards] ) : (), + search => $ts, + results => 20, + page => $frm->{p}, + hide => 1, + what => 'thread user', + sort => 'date', reverse => 1, + ); + } + + my $url = '/t/search?'.join ';', 'bq='.uri_escape($frm->{bq}), $frm->{t} ? 't=1' : (), map "b=$_", keys %boards; + if(!@$l) { + div class => 'mainbox'; + h1 mt '_dissearch_noresults_title'; + p mt '_dissearch_noresults_msg'; + end; + } elsif($frm->{t}) { + _threadlist($self, $l, $frm, $np, $url, 'all'); + } else { + $self->htmlBrowse( + items => $l, + options => $frm, + nextpage => $np, + pageurl => $url, + class => 'postsearch', + header => [ + sub { td class => 'tc1_1', ''; td class => 'tc1_2', ''; }, + [ mt '_dissearch_col_date' ], + [ mt '_dissearch_col_user' ], + [ mt '_dissearch_col_msg' ], + ], + row => sub { + my($s, $n, $l) = @_; + my $link = "/t$l->{tid}.$l->{num}"; + Tr; + td class => 'tc1_1'; a href => $link, 't'.$l->{tid}; end; + td class => 'tc1_2'; a href => $link, '.'.$l->{num}; end; + td class => 'tc2', $self->{l10n}->date($l->{date}); + td class => 'tc3'; lit $self->{l10n}->userstr($l->{uid}, $l->{username}); end; + td class => 'tc4'; + div class => 'title'; + a href => $link, $l->{title}; + end; + # TODO: ts_headline() or something like it. + div class => 'thread'; + lit bb2html($l->{msg}, 300); + end; + end; + end; + } + ); + } + $self->htmlFooter; +} + + sub _threadlist { my($self, $list, $f, $np, $url, $board) = @_; $self->htmlBrowse( diff --git a/lib/VNDB/Util/FormHTML.pm b/lib/VNDB/Util/FormHTML.pm index 4a63a9b7..041d5a14 100644 --- a/lib/VNDB/Util/FormHTML.pm +++ b/lib/VNDB/Util/FormHTML.pm @@ -168,6 +168,7 @@ sub htmlFormPart { # Generates a form, first argument is a hashref with global options, keys: # frm => the $frm as returned by formValidate, # action => The location the form should POST to (also used as form id) +# method => post/get # upload => 1/0, adds an enctype. # nosubmit => 1/0, hides the submit button # editsum => 1/0, adds an edit summary field before the submit button @@ -178,7 +179,7 @@ sub htmlFormPart { # automatically calls htmlFormError and adds a 'formcode' field. sub htmlForm { my($self, $options, @subs) = @_; - form action => '/nospam?'.$options->{action}, method => 'post', 'accept-charset' => 'utf-8', + form action => '/nospam?'.$options->{action}, method => $options->{method}||'post', 'accept-charset' => 'utf-8', $options->{upload} ? (enctype => 'multipart/form-data') : (); div class => 'hidden'; diff --git a/util/sql/all.sql b/util/sql/all.sql index 833f5890..a1689149 100644 --- a/util/sql/all.sql +++ b/util/sql/all.sql @@ -123,6 +123,10 @@ CREATE SEQUENCE covers_seq; CREATE SEQUENCE charimg_seq; +-- Discussion board search index (can't put this in schema.sql because it refers to func.sql) +CREATE INDEX threads_posts_ts ON threads_posts USING gin(to_tsvector('english', strip_bb_tags(msg))); + + -- Rows that are assumed to be available INSERT INTO users (id, username, mail, perm) VALUES (0, 'deleted', 'del@vndb.org', 0); INSERT INTO users (username, mail, perm) VALUES ('multi', 'multi@vndb.org', 0); diff --git a/util/sql/func.sql b/util/sql/func.sql index f7125607..6bd483de 100644 --- a/util/sql/func.sql +++ b/util/sql/func.sql @@ -11,6 +11,12 @@ -- with that, either. +-- strip_bb_tags(text) - simple utility function to aid full-text searching +CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$ + SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'g'); +$$ LANGUAGE sql IMMUTABLE; + + -- update_vncache(id) - updates the c_* columns in the vn table CREATE OR REPLACE FUNCTION update_vncache(integer) RETURNS void AS $$ diff --git a/util/updates/update_2.25.sql b/util/updates/update_2.25.sql index 8207adc6..bba0662b 100644 --- a/util/updates/update_2.25.sql +++ b/util/updates/update_2.25.sql @@ -49,3 +49,11 @@ ALTER TYPE prefs_key ADD VALUE 'traits_sexual'; CREATE TYPE board_type AS ENUM ('an', 'db', 'ge', 'v', 'p', 'u'); ALTER TABLE threads_boards ALTER COLUMN type DROP DEFAULT; ALTER TABLE threads_boards ALTER COLUMN type TYPE board_type USING trim(type)::board_type; + + +-- Full-text board search +CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$ + SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'g'); +$$ LANGUAGE sql IMMUTABLE; + +CREATE INDEX threads_posts_ts ON threads_posts USING gin(to_tsvector('english', strip_bb_tags(msg))); -- cgit v1.2.3