summaryrefslogtreecommitdiff
path: root/lib/VNWeb/DB.pm
blob: b839baf3ef4ecb0dcc46a8c669d0830ddf47c5be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
package VNWeb::DB;

use v5.24;
use warnings;
use TUWF;
use SQL::Interp ':all';
use Carp 'carp';
use Exporter 'import';
use VNDB::Schema;

our @EXPORT = qw/
    sql
    sql_join sql_comma sql_and sql_array sql_func sql_fromhex sql_tohex sql_fromtime sql_totime
    enrich enrich_merge enrich_flatten
    db_entry
/;



# Test for potential SQL injection and warn about it. This will cause some
# false positives.
# The heuristic is pretty simple: Just check if there's an integer in the SQL
# statement. SQL injection through strings is likely to be caught much earlier,
# since that will generate a syntax error if the string is not properly escaped
# (and who'd put effort into escaping strings when placeholders are easier?).
sub interp_warn {
    my @r = sql_interp @_;
    carp "Possible SQL injection in '$r[0]'" if tuwf->debug && $r[0] =~ /[2-9]/; # 0 and 1 aren't interesting, "SELECT 1" is a common pattern and so is "x > 0"
    return @r;
}


# SQL::Interp wrappers around TUWF's db* methods.  These do not work with
# sql_type(). Proper integration should probably be added directly to TUWF.
sub TUWF::Object::dbExeci { shift->dbExec(interp_warn @_) }
sub TUWF::Object::dbVali  { shift->dbVal (interp_warn @_) }
sub TUWF::Object::dbRowi  { shift->dbRow (interp_warn @_) }
sub TUWF::Object::dbAlli  { shift->dbAll (interp_warn @_) }
sub TUWF::Object::dbPagei { shift->dbPage(shift, interp_warn @_) }

# Ugly workaround to ensure that db* method failures are reported at the actual caller.
$Carp::Internal{ (__PACKAGE__) }++;



# sql_* are macros for SQL::Interp use

# join(), but for sql objects.
sub sql_join {
    my $sep = shift;
    my @args = map +($sep, $_), @_;
    shift @args;
    return @args;
}

# Join multiple arguments together with a comma, for use in a SELECT or IN
# clause or function arguments.
sub sql_comma { sql_join ',', @_ }

sub sql_and   { sql_join 'AND', map sql('(', $_, ')'), @_ }

# Construct a PostgreSQL array type from the function arguments.
sub sql_array { 'ARRAY[', sql_join(',', map \$_, @_), ']' }

# Call an SQL function
sub sql_func {
    my($funcname, @args) = @_;
    sql $funcname, '(', sql_comma(@args), ')';
}

# Convert a Perl hex value into Postgres bytea
sub sql_fromhex($) {
    sql_func decode => \$_[0], "'hex'";
}

# Convert a Postgres bytea into a Perl hex value
sub sql_tohex($) {
    sql_func encode => $_[0], "'hex'";
}

# Convert a Perl time value (UNIX timestamp) into a Postgres timestamp
sub sql_fromtime($) {
    sql_func to_timestamp => \$_[0];
}

# Convert a Postgres timestamp into a Perl time value
sub sql_totime($) {
    sql "extract('epoch' from ", $_[0], ')';
}




# The enrich*() functions are based on https://dev.yorhel.nl/doc/sqlobject
# See that article for general usage information, the following is purely
# reference documentation:
#
# enrich $name, $key, $merge_col, $sql, @objects;
#
#   Add a $name field each item in @objects,
#   Its value is a (possibly empty) array of hashes with data from $sql,
#
# enrich_flatten $name, $key, $merge_col, $sql, @objects;
#
#   Add a $name field each item in @objects,
#   Its value is a (possibly empty) array of values from a single column from $sql,
#
# enrich_merge $key, $sql, @objects;
#
#   Merge all columns returned by $sql into @objects;
#
#
# Arguments:
#
#   $key is the field in @objects used in the IN clause of $sql,
#
#   $merge_col is the column name returned by $sql and compared against the
#     values of the $key field.
#     (enrich_merge() requires that the column name is equivalent to $key)
#
#   $sql is the query to be executed, can be either:
#     - A string or sql() object, in which case it should end with ' IN' so
#       that the list of identifiers can be appended to it.
#     - A subroutine, in which case the array of identifiers is given as first
#       argument. The sub should return an sql() object.
#
#   @objects is a list or array of hashrefs to be enriched.


# Helper function for the enrich functions below.
sub _enrich {
    my($merge, $key, $sql, @array) = @_;

    # 'flatten' the given array, so that you can also give arrayrefs as argument
    @array = map +(ref $_ eq 'ARRAY' ? @$_ : $_), @array;

    # Create a list of unique identifiers to fetch, do nothing if there's nothing to fetch
    my %ids = map +($_->{$key},1), @array;
    return if !keys %ids;

    # Fetch the data
    $sql = ref $sql eq 'CODE' ? $sql->([keys %ids]) : sql $sql, [keys %ids];
    my $data = tuwf->dbAlli($sql);

    # And merge
    $merge->($data, \@array);
}


sub enrich {
    my($name, $key, $merge_col, $sql, @array) = @_;
    _enrich sub {
        my($data, $array) = @_;
        my %ids = ();
        push $ids{ delete $_->{$merge_col} }->@*, $_ for @$data;
        $_->{$name} = $ids{ $_->{$key} }||[] for @$array;
    }, $key, $sql, @array;
}


sub enrich_merge {
    my($key, $sql, @array) = @_;
    _enrich sub {
        my($data, $array) = @_;
        my %ids = map +(delete($_->{$key}), $_), @$data;
        %$_ = (%$_, $ids{ $_->{$key} }->%*) for @$array;
    }, $key, $sql, @array;
}


sub enrich_flatten {
    my($name, $key, $merge_col, $sql, @array) = @_;
    _enrich sub {
        my($data, $array) = @_;
        my %ids = ();
        push $ids{ delete $_->{$merge_col} }->@*, values %$_ for @$data;
        $_->{$name} = $ids{ $_->{$key} }||[] for @$array;
    }, $key, $sql, @array;
}



# Database entry API: Intended to provide a low-level read/write interface for
# versioned database entires. The same data structure is used for reading and
# updating entries, and should support easy diffing/comparison.
# Not very convenient for general querying & searching, those still need custom
# queries.


# Hash table, something like:
# {
#   v => {
#       prefix => 'vn',
#       base => { .. 'vn_hist' schema }
#       tables => {
#           anime => { .. 'vn_anime_hist' schema }
#       },
#   }, ..
# }
my $entry_types = do {
    my $schema = VNDB::Schema::schema;
    my %types = map +($_->{dbentry_type}, { prefix => $_->{name} }), grep $_->{dbentry_type}, values %$schema;
    for my $t (values %$schema) {
        my $n = $t->{name};
        my($type) = grep $n =~ s/^$_->{prefix}_//, values %types;
        next if !$type;
        $type->{base} = $t if $n eq 'hist';
        next if $n !~ s/_hist$//;
        $type->{tables}{$n} = $t;
    }
    \%types;
};


# Returns everything for a specific entry ID. The top-level hash also includes
# the following keys:
#
#   id, chid, rev, maxrev, hidden, locked, entry_hidden, entry_locked
#
# (Ordering of arrays is unspecified)
#
# TODO:
# - Use non _hist tables if $maxrev == $rev (should be faster)
# - Combine the enrich_merge() calls into a single query.
sub db_entry {
    my($type, $id, $rev) = @_;
    my $t = $entry_types->{$type}||die;

    my $maxrev = tuwf->dbVali('SELECT MAX(rev) FROM changes WHERE type =', \$type, ' AND itemid =', \$id);
    return undef if !$maxrev;
    $rev ||= $maxrev;
    my $entry = tuwf->dbRowi(q{
        SELECT itemid AS id, id AS chid, rev AS chrev, ihid AS hidden, ilock AS locked
          FROM changes
         WHERE}, { type => $type, itemid => $id, rev => $rev }
    );
    return undef if !$entry->{id};
    $entry->{maxrev} = $maxrev;

    if($maxrev == $rev) {
        $entry->{entry_hidden} = $entry->{hidden};
        $entry->{entry_locked} = $entry->{locked};
    } else {
        my $base = $t->{base}{name} =~ s/_hist$//r;
        enrich_merge id => "SELECT id, hidden AS entry_hidden, locked AS entry_locked FROM \"$base\" WHERE id IN", $entry;
    }

    enrich_merge chid => sql(
        SELECT => sql_comma(map "\"$_->{name}\"", $t->{base}{cols}->@*),
        FROM => "\"$t->{base}{name}\"",
        'WHERE chid IN'
    ), $entry;

    while(my($name, $tbl) = each $t->{tables}->%*) {
        $entry->{$name} = tuwf->dbAlli(
            SELECT => sql_comma(map "\"$_->{name}\"", grep $_->{name} ne 'chid', $tbl->{cols}->@*),
            FROM => "\"$tbl->{name}\"",
            WHERE => { chid => $entry->{chid} }
        );
    }
    $entry
}

1;