summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-07-03 15:14:49 +0200
committerYorhel <git@yorhel.nl>2016-07-03 15:14:49 +0200
commite2cd8349be295dff3fd2b07f1b1c8282377a1ffd (patch)
tree6201ae5b4635359222999b4f82b4227f8f26116e
parent5648ca9997e4d986799bccd877deaf72b47e48a0 (diff)
Generalize substring search relevance + apply to most dropdown searches
This is a generalization of the search improvements made in 7da2edeaa0f6cf7794f4f8f68960497dc1be893c and 92235222dba4e5d0c7713d53ef12e0f10e371b83 And has been applied to the dropdown searches for producers, staff, tags and traits. For all those searches, exact matches are listed first, followed by prefix matches, and then substring matches. Relevance is currently only based on the primary name/title and ignores aliases (except for staff). This is fixable, but not trivial, and I'm not sure it's all that useful.
-rw-r--r--lib/VNDB/DB/Producers.pm14
-rw-r--r--lib/VNDB/DB/Staff.pm15
-rw-r--r--lib/VNDB/DB/Tags.pm11
-rw-r--r--lib/VNDB/DB/Traits.pm13
-rw-r--r--lib/VNDB/Handler/Producers.pm2
-rw-r--r--lib/VNDB/Handler/Staff.pm16
-rw-r--r--lib/VNDB/Handler/Tags.pm2
-rw-r--r--lib/VNDB/Handler/Traits.pm18
-rw-r--r--util/sql/func.sql15
-rw-r--r--util/updates/update_2.26.sql11
10 files changed, 66 insertions, 51 deletions
diff --git a/lib/VNDB/DB/Producers.pm b/lib/VNDB/DB/Producers.pm
index 929e333b..133c3f31 100644
--- a/lib/VNDB/DB/Producers.pm
+++ b/lib/VNDB/DB/Producers.pm
@@ -8,7 +8,7 @@ use Exporter 'import';
our @EXPORT = qw|dbProducerGet dbProducerGetRev dbProducerRevisionInsert|;
-# options: results, page, id, search, char
+# options: results, page, id, search, char, sort
# what: extended relations relgraph
sub dbProducerGet {
my $self = shift;
@@ -40,13 +40,19 @@ sub dbProducerGet {
$select .= ', p.desc, p.alias, p.website, p.l_wp, p.hidden, p.locked' if $o{what} =~ /extended/;
$select .= ', pg.svg' if $o{what} =~ /relgraph/;
- my($r, $np) = $self->dbPage(\%o, q|
+ my($order, @order) = ('p.name');
+ if($o{sort} && $o{sort} eq 'search') {
+ $order = 'least(substr_score(p.name, ?), substr_score(p.original, ?)), p.name';
+ @order = ($o{search}) x 2;
+ }
+
+ my($r, $np) = $self->dbPage(\%o, qq|
SELECT !s
FROM producers p
!s
!W
- ORDER BY p.name ASC|,
- $select, $join, \%where,
+ ORDER BY $order|,
+ $select, $join, \%where, @order
);
return _enrich($self, $r, $np, 0, $o{what});
diff --git a/lib/VNDB/DB/Staff.pm b/lib/VNDB/DB/Staff.pm
index 45e6df46..bf2ae325 100644
--- a/lib/VNDB/DB/Staff.pm
+++ b/lib/VNDB/DB/Staff.pm
@@ -7,7 +7,7 @@ use Exporter 'import';
our @EXPORT = qw|dbStaffGet dbStaffGetRev dbStaffRevisionInsert dbStaffAliasIds|;
-# options: results, page, id, aid, search, exact, truename, role, gender, notid
+# options: results, page, id, aid, search, exact, truename, role, gender
# what: extended changes roles aliases
sub dbStaffGet {
my $self = shift;
@@ -35,7 +35,6 @@ sub dbStaffGet {
$o{id} ? ( ref $o{id} ? ('s.id IN(!l)' => [$o{id}]) : ('s.id = ?' => $o{id}) ) : (),
$o{aid} ? ( ref $o{aid} ? ('sa.aid IN(!l)' => [$o{aid}]) : ('sa.aid = ?' => $o{aid}) ) : (),
$o{id} || $o{truename} ? ( 's.aid = sa.aid' => 1 ) : (),
- $o{notid} && @{$o{notid}} ? ('s.id NOT IN(!l)' => [$o{notid}]) : (),
defined $o{gender} ? ( 's.gender IN(!l)' => [ ref $o{gender} ? $o{gender} : [$o{gender}] ]) : (),
defined $o{lang} ? ( 's.lang IN(!l)' => [ ref $o{lang} ? $o{lang} : [$o{lang}] ]) : (),
defined $o{role} ? (
@@ -60,13 +59,19 @@ sub dbStaffGet {
my $select = 's.id, sa.aid, sa.name, sa.original, s.gender, s.lang';
$select .= ', s.desc, s.l_wp, s.l_site, s.l_twitter, s.l_anidb, s.hidden, s.locked' if $o{what} =~ /extended/;
- my($r, $np) = $self->dbPage(\%o, q|
+ my($order, @order) = ('sa.name');
+ if($o{sort} && $o{sort} eq 'search') {
+ $order = 'least(substr_score(sa.name, ?), substr_score(sa.original, ?)), sa.name';
+ @order = ($o{search}) x 2;
+ }
+
+ my($r, $np) = $self->dbPage(\%o, qq|
SELECT !s
FROM staff s
JOIN staff_alias sa ON sa.id = s.id
!W
- ORDER BY sa.name|,
- $select, \%where
+ ORDER BY $order|,
+ $select, \%where, @order
);
return _enrich($self, $r, $np, 0, $o{what});
diff --git a/lib/VNDB/DB/Tags.pm b/lib/VNDB/DB/Tags.pm
index 10b45a4b..8c27e55f 100644
--- a/lib/VNDB/DB/Tags.pm
+++ b/lib/VNDB/DB/Tags.pm
@@ -10,7 +10,7 @@ our @EXPORT = qw|dbTagGet dbTTTree dbTagEdit dbTagAdd dbTagMerge dbTagLinks dbTa
# %options->{ id noid name search state meta page results what sort reverse }
# what: parents childs(n) aliases addedby
-# sort: id name added items
+# sort: id name added items search
sub dbTagGet {
my $self = shift;
my %o = (
@@ -50,15 +50,18 @@ sub dbTagGet {
name => 't.name %s',
added => 't.added %s',
items => 't.c_items %s',
+ search=> 'substr_score(t.name, ?) ASC, t.name %s', # Assigning a matching score for aliases is also possible, but more involved
}->{ $o{sort}||'id' }, $o{reverse} ? 'DESC' : 'ASC';
+ my @order = $o{sort} && $o{sort} eq 'search' ? ($o{search}) : ();
- my($r, $np) = $self->dbPage(\%o, q|
+
+ my($r, $np) = $self->dbPage(\%o, qq|
SELECT !s
FROM tags t
!s
!W
- ORDER BY !s|,
- join(', ', @select), join(' ', @join), \%where, $order
+ ORDER BY $order|,
+ join(', ', @select), join(' ', @join), \%where, @order
);
if(@$r && $o{what} =~ /aliases/) {
diff --git a/lib/VNDB/DB/Traits.pm b/lib/VNDB/DB/Traits.pm
index f2eadaba..728e4e85 100644
--- a/lib/VNDB/DB/Traits.pm
+++ b/lib/VNDB/DB/Traits.pm
@@ -15,7 +15,7 @@ our @EXPORT = qw|dbTraitGet dbTraitEdit dbTraitAdd|;
# Options: id noid search name state what results page sort reverse
# what: parents childs(n) addedby
-# sort: id name name added items
+# sort: id name name added items search
sub dbTraitGet {
my $self = shift;
my %o = (
@@ -30,8 +30,7 @@ sub dbTraitGet {
my %where = (
$o{id} ? ( 't.id IN(!l)' => [ ref($o{id}) ? $o{id} : [$o{id}] ]) : (),
$o{group} ? ( 't.group = ?' => $o{group} ) : (),
- $o{noid} && ref($o{noid}) && @{$o{noid}} ? ('t.id NOT IN(!l)', [$o{noid}]) : (),
- $o{noid} && !ref($o{noid}) ? ('t.id <> ?' => $o{noid}) : (),
+ $o{noid} ? ( 't.id <> ?' => $o{noid} ) : (),
defined $o{state} && $o{state} != -1 ? (
't.state = ?' => $o{state} ) : (),
!defined $o{state} && !$o{id} && !$o{name} ? (
@@ -56,15 +55,17 @@ sub dbTraitGet {
group => 'tg."order" %s, t.name %1$s',
added => 't.added %s',
items => 't.c_items %s',
+ search=> 'substr_score(t.name, ?) ASC, t.name %s', # Can't score aliases at the moment
}->{ $o{sort}||'id' }, $o{reverse} ? 'DESC' : 'ASC';
+ my @order = $o{sort} && $o{sort} eq 'search' ? ($o{search}) : ();
- my($r, $np) = $self->dbPage(\%o, q|
+ my($r, $np) = $self->dbPage(\%o, qq|
SELECT !s
FROM traits t
!s
!W
- ORDER BY !s|,
- join(', ', @select), join(' ', @join), \%where, $order
+ ORDER BY $order|,
+ join(', ', @select), join(' ', @join), \%where, @order,
);
if($o{what} =~ /parents\((\d+)\)/) {
diff --git a/lib/VNDB/Handler/Producers.pm b/lib/VNDB/Handler/Producers.pm
index f3a167c2..8371d9a1 100644
--- a/lib/VNDB/Handler/Producers.pm
+++ b/lib/VNDB/Handler/Producers.pm
@@ -406,7 +406,7 @@ sub pxml {
$q = $q->{q};
my($list, $np) = $self->dbProducerGet(
- $q =~ /^p([1-9]\d*)/ ? (id => $1) : (search => $q),
+ $q =~ /^p([1-9]\d*)/ ? (id => $1) : (search => $q, sort => 'search'),
results => 10,
page => 1,
);
diff --git a/lib/VNDB/Handler/Staff.pm b/lib/VNDB/Handler/Staff.pm
index 24edf5ad..378bbd91 100644
--- a/lib/VNDB/Handler/Staff.pm
+++ b/lib/VNDB/Handler/Staff.pm
@@ -370,25 +370,11 @@ sub staffxml {
my $q = $self->formValidate({ get => 'q', required => 0, maxlength => 500 });
return $self->resNotFound if $q->{_err} || !$q->{q};
- # Prefixing name with '=' forces exact matching, old behaviour, not very useful anymore.
- my $exact = $q->{q} =~ s/^=//;
-
- # Try exact match first, so exact match is always first result
my($list, $np) = $self->dbStaffGet(
+ $q->{q} =~ /^s([1-9]\d*)/ ? (id => $1) : $q->{q} =~ /^=(.+)/ ? (exact => $1) : (search => $q->{q}, sort => 'search'),
results => 10, page => 1,
- $q->{q} =~ /^s([1-9]\d*)/ ? (id => $1) : (exact => $q->{q}),
);
- # Append results of a substring match
- if(!$np && !$exact) {
- my($nlist, $nnp) = $self->dbStaffGet(
- results => 10-@$list, page => 1, search => $q->{q},
- notid => [ map $_->{id}, @$list ],
- );
- $np = $nnp;
- $list = [ @$list, @$nlist ];
- }
-
$self->resHeader('Content-type' => 'text/xml; charset=UTF-8');
xml;
tag 'staff', more => $np ? 'yes' : 'no';
diff --git a/lib/VNDB/Handler/Tags.pm b/lib/VNDB/Handler/Tags.pm
index 61a3e2b3..43215fde 100644
--- a/lib/VNDB/Handler/Tags.pm
+++ b/lib/VNDB/Handler/Tags.pm
@@ -743,7 +743,7 @@ sub tagxml {
return $self->resNotFound if $f->{_err} || (!$f->{q} && !$f->{id} && !$f->{id}[0]);
my($list, $np) = $self->dbTagGet(
- !$f->{q} ? () : $f->{q} =~ /^g([1-9]\d*)/ ? (id => $1) : $f->{q} =~ /^name:(.+)$/ ? (name => $1) : (search => $f->{q}),
+ !$f->{q} ? () : $f->{q} =~ /^g([1-9]\d*)/ ? (id => $1) : $f->{q} =~ /^=(.+)$/ ? (name => $1) : (search => $f->{q}, sort => 'search'),
$f->{id} && $f->{id}[0] ? (id => $f->{id}) : (),
results => 15,
page => 1,
diff --git a/lib/VNDB/Handler/Traits.pm b/lib/VNDB/Handler/Traits.pm
index 811815c3..0f4169aa 100644
--- a/lib/VNDB/Handler/Traits.pm
+++ b/lib/VNDB/Handler/Traits.pm
@@ -409,26 +409,14 @@ sub traitxml {
);
return $self->resNotFound if $f->{_err} || (!$f->{q} && !$f->{id} && !$f->{id}[0]);
- # First try an exact match
my($list, $np) = $self->dbTraitGet(
- !$f->{q} ? () : $f->{q} =~ /^i([1-9]\d*)/ ? (id => $1) : (name => $f->{q}),
- $f->{id} && $f->{id}[0] ? (id => $f->{id}) : (),
results => $f->{r},
page => 1,
- sort => 'group'
+ sort => 'group',
+ !$f->{q} ? () : $f->{q} =~ /^i([1-9]\d*)/ ? (id => $1) : (search => $f->{q}, sort => 'search'),
+ $f->{id} && $f->{id}[0] ? (id => $f->{id}) : (),
);
- # Fill up the results with substring matches
- if(!$np && $f->{q} && !($f->{id} && $f->{id}[0])) {
- my($nlist, $nnp) = $self->dbTraitGet(
- results => $f->{r}-@$list, page => 1,
- search => $f->{q}, sort => 'group',
- noid => [ map $_->{id}, @$list ]
- );
- $np = $nnp;
- $list = [ @$list, @$nlist ];
- }
-
$self->resHeader('Content-type' => 'text/xml; charset=UTF-8');
xml;
tag 'traits', more => $np ? 'yes' : 'no';
diff --git a/util/sql/func.sql b/util/sql/func.sql
index 125455a2..ec109692 100644
--- a/util/sql/func.sql
+++ b/util/sql/func.sql
@@ -31,6 +31,21 @@ CREATE OR REPLACE FUNCTION strip_spoilers(t text) RETURNS text AS $$
$$ LANGUAGE sql IMMUTABLE;
+-- Assigns a score to the relevance of a substring match, intended for use in
+-- an ORDER BY clause. Exact matches are ordered first, prefix matches after
+-- that, and finally a normal substring match. Not particularly fast, but
+-- that's to be expected of naive substring searches.
+-- Pattern must be escaped for use as a LIKE pattern.
+CREATE OR REPLACE FUNCTION substr_score(str text, pattern text) RETURNS integer AS $$
+SELECT CASE
+ WHEN str ILIKE pattern THEN 0
+ WHEN str ILIKE pattern||'%' THEN 1
+ WHEN str ILIKE '%'||pattern||'%' THEN 2
+ ELSE 3
+END;
+$$ LANGUAGE SQL;
+
+
-- update_vncache(id) - updates some c_* columns in the vn table
CREATE OR REPLACE FUNCTION update_vncache(integer) RETURNS void AS $$
UPDATE vn SET
diff --git a/util/updates/update_2.26.sql b/util/updates/update_2.26.sql
index 444666f8..2d5251a6 100644
--- a/util/updates/update_2.26.sql
+++ b/util/updates/update_2.26.sql
@@ -33,3 +33,14 @@ ALTER TABLE threads_poll_votes ADD CONSTRAINT threads_poll_votes_optid_fke
-- Tagalog language
ALTER TYPE language ADD VALUE 'ta' BEFORE 'tr';
+
+
+-- Improved substring search relevance
+CREATE OR REPLACE FUNCTION substr_score(str text, pattern text) RETURNS integer AS $$
+SELECT CASE
+ WHEN str ILIKE pattern THEN 0
+ WHEN str ILIKE pattern||'%' THEN 1
+ WHEN str ILIKE '%'||pattern||'%' THEN 2
+ ELSE 3
+END;
+$$ LANGUAGE SQL;