summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/all.sql1
-rw-r--r--sql/func.sql104
-rw-r--r--sql/schema.sql3
-rw-r--r--sql/util.sql102
-rwxr-xr-xutil/dbdump.pl3
-rwxr-xr-xutil/devdump.pl1
6 files changed, 107 insertions, 107 deletions
diff --git a/sql/all.sql b/sql/all.sql
index 852719c2..15ae372d 100644
--- a/sql/all.sql
+++ b/sql/all.sql
@@ -1,6 +1,7 @@
-- NOTE: Make sure you're cd'ed in the vndb root directory before running this script
\set ON_ERROR_STOP 1
+\i sql/util.sql
\i sql/schema.sql
\i sql/data.sql
\i sql/func.sql
diff --git a/sql/func.sql b/sql/func.sql
index 4e658bf8..417c3d48 100644
--- a/sql/func.sql
+++ b/sql/func.sql
@@ -4,81 +4,12 @@
-- notify_* -> functions creating entries in the notifications table
-- user_* -> functions to manage users and sessions
-- update_* -> functions to update a cache
--- *_update ^ (I should probably rename these to
--- *_calc ^ the update_* scheme for consistency)
+-- *_calc ^ (same, should prolly rename to the update_* scheme for consistency)
-- I like to keep the nouns in functions singular, in contrast to the table
-- naming scheme where nouns are always plural. But I'm not very consistent
-- with that, either.
--- strip_bb_tags(text) - simple utility function to aid full-text searching
-CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$
- SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'gi');
-$$ LANGUAGE sql IMMUTABLE;
-
--- Wrapper around to_tsvector() and strip_bb_tags(), implemented in plpgsql and
--- with an associated cost function to make it opaque to the query planner and
--- ensure the query planner realizes that this function is _slow_.
-CREATE OR REPLACE FUNCTION bb_tsvector(t text) RETURNS tsvector AS $$
-BEGIN
- RETURN to_tsvector('english', public.strip_bb_tags(t));
-END;
-$$ LANGUAGE plpgsql IMMUTABLE COST 500;
-
--- BUG: Since this isn't a full bbcode parser, [spoiler] tags inside [raw] or [code] are still considered spoilers.
-CREATE OR REPLACE FUNCTION strip_spoilers(t text) RETURNS text AS $$
- -- The website doesn't require the [spoiler] tag to be closed, the outer replace catches that case.
- SELECT regexp_replace(regexp_replace(t, '\[spoiler\].*?\[/spoiler\]', ' ', 'ig'), '\[spoiler\].*', ' ', 'i');
-$$ LANGUAGE sql IMMUTABLE;
-
-
--- Assigns a score to the relevance of a substring match, intended for use in
--- an ORDER BY clause. Exact matches are ordered first, prefix matches after
--- that, and finally a normal substring match. Not particularly fast, but
--- that's to be expected of naive substring searches.
--- Pattern must be escaped for use as a LIKE pattern.
-CREATE OR REPLACE FUNCTION substr_score(str text, pattern text) RETURNS integer AS $$
-SELECT CASE
- WHEN str ILIKE pattern THEN 0
- WHEN str ILIKE pattern||'%' THEN 1
- WHEN str ILIKE '%'||pattern||'%' THEN 2
- ELSE 3
-END;
-$$ LANGUAGE SQL;
-
-
--- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'.
--- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient.
-CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
- SELECT CASE WHEN chr = '0'
- THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
- ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
- END;
-$$ LANGUAGE SQL IMMUTABLE;
-
-
--- Helper function for search normalization
-CREATE OR REPLACE FUNCTION search_norm_term(str text) RETURNS text AS $$
- SELECT regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(
- translate(lower(public.unaccent(normalize(str, NFKC))), $s$@,_-‐.~~〜∼ー῀:[]()%+!?#$`♥★☆♪†「」『』【】・<>'$s$, 'a'), -- '
- '\s+', '', 'g'),
- '&', 'and', 'g'),
- 'disc', 'disk', 'g'),
- 'gray', 'grey', 'g'),
- 'colour', 'color', 'g'),
- 'senpai', 'sempai', 'g');
-$$ LANGUAGE SQL IMMUTABLE;
-
-
-CREATE OR REPLACE FUNCTION search_gen(terms text[]) RETURNS text AS $$
- SELECT coalesce(string_agg(t, ' '), '') FROM (
- SELECT t FROM (
- SELECT public.search_norm_term(t) FROM unnest(terms) x(t)
- ) x(t) WHERE t IS NOT NULL AND t <> '' GROUP BY t ORDER BY t
- ) x(t);
-$$ LANGUAGE SQL IMMUTABLE;
-
-
CREATE OR REPLACE FUNCTION search_gen_vn(vnid vndbid) RETURNS text AS $$
SELECT coalesce(string_agg(t, ' '), '') FROM (
SELECT t FROM (
@@ -109,39 +40,6 @@ CREATE OR REPLACE FUNCTION search_gen_vn(vnid vndbid) RETURNS text AS $$
$$ LANGUAGE SQL;
-
--- Split a search query into LIKE patterns.
--- Supports double quoting for adjacent terms.
--- e.g. 'SEARCH que.ry "word here"' -> '{%search%,%query%,%wordhere%}'
---
--- Can be efficiently used as follows: c_search LIKE ALL (search_query('query here'))
-CREATE OR REPLACE FUNCTION search_query(q text) RETURNS text[] AS $$
-DECLARE
- tmp text;
- ret text[];
-BEGIN
- ret := ARRAY[]::text[];
- LOOP
- q := regexp_replace(q, '^\s+', '');
- IF q = '' THEN EXIT;
- ELSIF q ~ '^"[^"]+"' THEN
- tmp := regexp_replace(q, '^"([^"]+)".*$', '\1', '');
- q := regexp_replace(q, '^"[^"]+"', '', '');
- ELSE
- tmp := regexp_replace(q, '^([^\s]+).*$', '\1', '');
- q := regexp_replace(q, '^[^\s]+', '', '');
- END IF;
-
- tmp := '%'||search_norm_term(tmp)||'%';
- IF length(tmp) > 2 AND NOT (ARRAY[tmp] <@ ret) THEN
- ret := array_append(ret, tmp);
- END IF;
- END LOOP;
- RETURN ret;
-END;
-$$ LANGUAGE plpgsql IMMUTABLE;
-
-
-- update_vncache(id) - updates some c_* columns in the vn table
CREATE OR REPLACE FUNCTION update_vncache(vndbid) RETURNS void AS $$
UPDATE vn SET
diff --git a/sql/schema.sql b/sql/schema.sql
index 7d8dfde4..15411057 100644
--- a/sql/schema.sql
+++ b/sql/schema.sql
@@ -107,9 +107,6 @@ CREATE SEQUENCE threads_id_seq;
CREATE SEQUENCE vn_id_seq;
CREATE SEQUENCE users_id_seq;
--- Forward declaration for required functions. These are fully defined in func.sql
-CREATE FUNCTION search_gen(terms text[]) RETURNS text AS 'SELECT NULL' LANGUAGE SQL IMMUTABLE;
-
-- anime
CREATE TABLE anime (
diff --git a/sql/util.sql b/sql/util.sql
new file mode 100644
index 00000000..b8b9c670
--- /dev/null
+++ b/sql/util.sql
@@ -0,0 +1,102 @@
+-- This file is for generic utility functions that do not depend on the data schema.
+-- It should be loaded before schema.sql.
+
+
+-- strip_bb_tags(text) - simple utility function to aid full-text searching
+CREATE OR REPLACE FUNCTION strip_bb_tags(t text) RETURNS text AS $$
+ SELECT regexp_replace(t, '\[(?:url=[^\]]+|/?(?:spoiler|quote|raw|code|url))\]', ' ', 'gi');
+$$ LANGUAGE sql IMMUTABLE;
+
+-- Wrapper around to_tsvector() and strip_bb_tags(), implemented in plpgsql and
+-- with an associated cost function to make it opaque to the query planner and
+-- ensure the query planner realizes that this function is _slow_.
+CREATE OR REPLACE FUNCTION bb_tsvector(t text) RETURNS tsvector AS $$
+BEGIN
+ RETURN to_tsvector('english', public.strip_bb_tags(t));
+END;
+$$ LANGUAGE plpgsql IMMUTABLE COST 500;
+
+-- BUG: Since this isn't a full bbcode parser, [spoiler] tags inside [raw] or [code] are still considered spoilers.
+CREATE OR REPLACE FUNCTION strip_spoilers(t text) RETURNS text AS $$
+ -- The website doesn't require the [spoiler] tag to be closed, the outer replace catches that case.
+ SELECT regexp_replace(regexp_replace(t, '\[spoiler\].*?\[/spoiler\]', ' ', 'ig'), '\[spoiler\].*', ' ', 'i');
+$$ LANGUAGE sql IMMUTABLE;
+
+
+-- Assigns a score to the relevance of a substring match, intended for use in
+-- an ORDER BY clause. Exact matches are ordered first, prefix matches after
+-- that, and finally a normal substring match. Not particularly fast, but
+-- that's to be expected of naive substring searches.
+-- Pattern must be escaped for use as a LIKE pattern.
+CREATE OR REPLACE FUNCTION substr_score(str text, pattern text) RETURNS integer AS $$
+SELECT CASE
+ WHEN str ILIKE pattern THEN 0
+ WHEN str ILIKE pattern||'%' THEN 1
+ WHEN str ILIKE '%'||pattern||'%' THEN 2
+ ELSE 3
+END;
+$$ LANGUAGE SQL;
+
+
+-- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'.
+-- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient.
+CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
+ SELECT CASE WHEN chr = '0'
+ THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
+ ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
+ END;
+$$ LANGUAGE SQL IMMUTABLE;
+
+
+-- Helper function for search normalization
+CREATE OR REPLACE FUNCTION search_norm_term(str text) RETURNS text AS $$
+ SELECT regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(
+ translate(lower(public.unaccent(normalize(str, NFKC))), $s$@,_-‐.~~〜∼ー῀:[]()%+!?#$`♥★☆♪†「」『』【】・<>'$s$, 'a'), -- '
+ '\s+', '', 'g'),
+ '&', 'and', 'g'),
+ 'disc', 'disk', 'g'),
+ 'gray', 'grey', 'g'),
+ 'colour', 'color', 'g'),
+ 'senpai', 'sempai', 'g');
+$$ LANGUAGE SQL IMMUTABLE;
+
+
+CREATE OR REPLACE FUNCTION search_gen(terms text[]) RETURNS text AS $$
+ SELECT coalesce(string_agg(t, ' '), '') FROM (
+ SELECT t FROM (
+ SELECT public.search_norm_term(t) FROM unnest(terms) x(t)
+ ) x(t) WHERE t IS NOT NULL AND t <> '' GROUP BY t ORDER BY t
+ ) x(t);
+$$ LANGUAGE SQL IMMUTABLE;
+
+
+-- Split a search query into LIKE patterns.
+-- Supports double quoting for adjacent terms.
+-- e.g. 'SEARCH que.ry "word here"' -> '{%search%,%query%,%wordhere%}'
+--
+-- Can be efficiently used as follows: c_search LIKE ALL (search_query('query here'))
+CREATE OR REPLACE FUNCTION search_query(q text) RETURNS text[] AS $$
+DECLARE
+ tmp text;
+ ret text[];
+BEGIN
+ ret := ARRAY[]::text[];
+ LOOP
+ q := regexp_replace(q, '^\s+', '');
+ IF q = '' THEN EXIT;
+ ELSIF q ~ '^"[^"]+"' THEN
+ tmp := regexp_replace(q, '^"([^"]+)".*$', '\1', '');
+ q := regexp_replace(q, '^"[^"]+"', '', '');
+ ELSE
+ tmp := regexp_replace(q, '^([^\s]+).*$', '\1', '');
+ q := regexp_replace(q, '^[^\s]+', '', '');
+ END IF;
+
+ tmp := '%'||search_norm_term(tmp)||'%';
+ IF length(tmp) > 2 AND NOT (ARRAY[tmp] <@ ret) THEN
+ ret := array_append(ret, tmp);
+ END IF;
+ END LOOP;
+ RETURN ret;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;
diff --git a/util/dbdump.pl b/util/dbdump.pl
index 5adf35de..f749ed72 100755
--- a/util/dbdump.pl
+++ b/util/dbdump.pl
@@ -305,6 +305,7 @@ sub export_data {
binmode($F, ":utf8");
select $F;
print "\\set ON_ERROR_STOP 1\n";
+ print "\\i sql/util.sql\n";
print "\\i sql/schema.sql\n";
# Would be nice if VNDB::Schema could list sequences, too.
my @seq = sort @{ $db->selectcol_arrayref(
@@ -312,7 +313,7 @@ sub export_data {
) };
printf "SELECT setval('%s', %d);\n", $_, $db->selectrow_array("SELECT last_value FROM \"$_\"", {}) for @seq;
for my $t (sort { $a->{name} cmp $b->{name} } values %$schema) {
- my $cols = join ',', map "\"$_->{name}\"", $t->{cols}->@*;
+ my $cols = join ',', map "\"$_->{name}\"", grep $_->{decl} !~ /\sGENERATED\s/, $t->{cols}->@*;
my $order = table_order $t->{name};
print "\nCOPY \"$t->{name}\" ($cols) FROM STDIN;\n";
$db->do("COPY (SELECT $cols FROM \"$t->{name}\" $order) TO STDOUT");
diff --git a/util/devdump.pl b/util/devdump.pl
index a36d0184..1cc54ea0 100755
--- a/util/devdump.pl
+++ b/util/devdump.pl
@@ -96,6 +96,7 @@ sub copy_entry {
print "-- This file replaces 'sql/all.sql'.\n";
print "\\set ON_ERROR_STOP 1\n";
+ print "\\i sql/util.sql\n";
print "\\i sql/schema.sql\n";
print "\\i sql/data.sql\n";
print "\\i sql/func.sql\n";