summaryrefslogtreecommitdiff
path: root/src/share/db.c
blob: cdd8e7f20268ce9235047e5c997e8592a14b6cf2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
/* Copyright (c) 2012-2013 Yoran Heling

  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  without limitation the rights to use, copy, modify, merge, publish,
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:

  The above copyright notice and this permission notice shall be included
  in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

#include <global.h>
#include <share/local.h>


/* Lookup table from pathids to path_t pointers. Used as a quick lookup table
 * when loading all files or when batch-removing identifiers from the database.
 *
 * TODO: It's possible to avoid conversion from the blobs to native byte order
 * if the keys are already in big-endian byte order, thus speeding up lookups a
 * bit.
 *
 * TODO: kh_int_hash_func() seems like a terrible hash function for the kind of
 * integers being used here. Need to look for something better. */
KHASH_INIT(pathids, uint32_t, share_path_t *, 1, kh_int_hash_func, kh_int_hash_equal)
static khash_t(pathids) *pathids = NULL;


static void pathids_create() {
	pathids = kh_init(pathids);
	size_t i,j;
	for(i=0; i<share_conf_shares.n; i++) {
		share_t *s = share_conf_shares.a[i];
		for(j=0; j<s->c.paths.n; j++) {
			share_path_t *p = s->c.paths.a[j];
			int r;
			khiter_t k = kh_put(pathids, pathids, share_conf_pathid(p), &r);
			assert(r != 0);
			kh_value(pathids, k) = p;
		}
	}
}


static void pathids_destroy() {
	kh_destroy(pathids, pathids);
	pathids = NULL;
}




static inline uint32_t blob2pathid(const uint8_t *blob) {
	return
		(((uint32_t)blob[0]) << 24) +
		(((uint32_t)blob[1]) << 16) +
		(((uint32_t)blob[2]) <<  8) +
		(((uint32_t)blob[3]) <<  0);
}


static inline void pathid2blob(uint32_t pathid, uint8_t *blob) {
	blob[0] = (pathid >> 24) & 0xff;
	blob[1] = (pathid >> 16) & 0xff;
	blob[2] = (pathid >>  8) & 0xff;
	blob[3] = (pathid >>  0) & 0xff;
}


static bool pathid_exists(const uint8_t *paths, size_t pathslen, uint32_t pathid) {
	size_t i;
	for(i=0; i<pathslen; i+=4)
		if(blob2pathid(paths+i) == pathid)
			break;
	return i < pathslen;
}


static void sql_pathid_add(sqlite3_context *ctx, int narg, sqlite3_value **args) {
	assert(narg == 2);

	const uint8_t *paths = sqlite3_value_blob(args[0]);
	size_t pathslen = sqlite3_value_bytes(args[0]);
	uint32_t pathid = sqlite3_value_int64(args[1]);

	if(pathid_exists(paths, pathslen, pathid))
		sqlite3_result_value(ctx, args[0]);
	else {
		uint8_t *newpaths = malloc(pathslen+4);
		memcpy(newpaths, paths, pathslen);
		pathid2blob(pathid, newpaths+pathslen);
		sqlite3_result_blob(ctx, newpaths, pathslen+4, free);
	}
}


static void share_db_createfuncs(sqlasync_t *sql, sqlite3 *db, sqlasync_queue_t *q, int val_num, sqlasync_value_t *val) {
	sqlite3_create_function(db, "pathid_add", 2, SQLITE_ANY, NULL, sql_pathid_add, NULL, NULL);
	sqlasync_queue_result(q, sqlasync_result_create(SQLITE_DONE, 1, 0));
}




static sqlite_int64 path2rowid(const char *path) {
	uint8_t res[32];
	crypt_sha256(path, strlen(path), res);
	/* Similar to hub/users.c: hub_users_genid() */
#define X(i,s) (((uint64_t)res[i]) << ((uint64_t)(s*8)))
	sqlite_int64 v = (X(3,7) | X(31,6) | X(18,5) | X(27,4) | X(8,3) | X(29,2) | X(26,1) | X(15,0)) >> 1;
#undef X
	/* We want to use the full range of the 64bit integer, but the above
	 * assignment throws away one bit (necessary, because the result is a
	 * signed int and casting a value that doesn't fit in the resulting type is
	 * undefined/implementation defined). So instead, we take a not previously
	 * used bit from the hash and use that to set the sign bit, in a portable
	 * fashion. */
	return res[9] & 1 ? -v - 1 : v;
}


typedef struct {
	sqlite_int64 rowid;
	time_t lastmod;
	uint32_t pathid;
	uint64_t size;
	share_db_exists_cb cb;
	void *data;
} share_db_exists_t;


static void share_db_exists_res(sqlasync_queue_t *q, void *data) {
	share_db_exists_t *e = data;
	sqlasync_result_t *r = db_get(q, "Getting sharefiles info for rowid=%"PRId64, (int64_t)e->rowid);

	const char *tth = NULL;
	if(r->result != SQLITE_ROW)
		goto done;

	/* tth_root, size, lastmodified, pathids */
	if(r->col[0].type != SQLITE_BLOB || !r->col[0].val.ptr || r->col[0].length != 24
			|| r->col[1].type != SQLITE_INTEGER || r->col[1].val.i64 < 0
			|| r->col[2].type != SQLITE_INTEGER
			|| r->col[3].type != SQLITE_BLOB || (r->col[3].length % 4) != 0)
		goto done;

	/* File info doesn't match with what we have, pretend that we don't have
	 * the file in the database so that it gets rehashed. */
	if(e->size != (uint64_t)r->col[1].val.i64 || e->lastmod > r->col[2].val.i64)
		goto done;

	tth = r->col[0].val.ptr;

	/* Ensure that the pathid is present */
	if(!pathid_exists(r->col[3].val.ptr, r->col[3].length, e->pathid))
		sqlasync_sql(db_sql, NULL, SQLASYNC_STATIC,
			"UPDATE sharefiles SET pathids = pathid_add(pathids, ?) WHERE id = ?",
			2, sqlasync_int(e->pathid), sqlasync_int(e->rowid));

done:
	e->cb(e->data, tth);
	sqlasync_result_free(r);
	sqlasync_queue_destroy(q);
	free(e);
}


/* TODO: Expose share_db_exists_t object and let the caller manage it? Would
 * avoid a separately malloced object... */
void share_db_exists(const char *path, time_t lastmod, uint64_t size, uint32_t pathid,
		share_db_exists_cb cb, void *data) {
	share_db_exists_t *e = malloc(sizeof(share_db_exists_t));
	e->rowid = path2rowid(path);
	e->lastmod = lastmod;
	e->pathid = pathid;
	e->size = size;
	e->cb = cb;
	e->data = data;
	sqlasync_sql(db_sql, sqlasync_queue_async(db_wakeup, 1, share_db_exists_res, e),
		SQLASYNC_STATIC, "SELECT tth_root, size, lastmodified, pathids FROM sharefiles WHERE id = ?",
		1, sqlasync_int(e->rowid)
	);
}


void share_db_addfile(const char *path, time_t lastmod, uint64_t size, uint32_t pathid,
		const char *tth_root, char *tthl_data, size_t tthl_length) {
	sqlite_int64 rowid = path2rowid(path);
	sqlasync_lock(db_sql);
	sqlasync_sql_unlocked(db_sql, NULL, SQLASYNC_STATIC|SQLASYNC_NEXT,
		"INSERT OR IGNORE INTO hashdata (id, tth_root, tth_leaves) VALUES (?, ?, ?)",
		3, sqlasync_int(rowid),
		sqlasync_blob(SQLASYNC_COPY, 24, tth_root),
		sqlasync_blob(SQLASYNC_FREE, tthl_length, tthl_data)
	);
	sqlasync_sql_unlocked(db_sql, NULL, SQLASYNC_STATIC,
		"INSERT OR UPDATE INTO sharefiles (id, path, pathids, size, lastmodified, tth_root)"
		" VALUES (?, ?, pathid_add((SELECT pathids FROM sharefiles WHERE id = ?), ?), ?, ?, ?)",
		7, sqlasync_int(rowid), sqlasync_text(SQLASYNC_COPY, path),
		sqlasync_int(rowid), sqlasync_int(pathid),
		sqlasync_int(size), sqlasync_int(lastmod),
		sqlasync_blob(SQLASYNC_FREE, 24, tth_root)
	);
	sqlasync_unlock(db_sql);
}




static void share_db_load_filepath(share_path_t *p, char *path, const char *name, uint64_t size, time_t lastmod, const char *tth, kstring_t *buf) {
	/* Removing the FilesystemPath from the file path will give the relative
	 * path of the file within the configured share. Prefixing that with the
	 * VirtualPath will give the full virtual path of the file within the
	 * configured share. For example:
	 *
	 *   path = /share/Some
	 *   p.FilesystemPath = /share       -> relative = Some
	 *   p.VirtualPath = /MySharedFiles  -> virtual  = /MySharedFiles/Some
	 *
	 * This only works when symlinks aren't resolved. Which we don't do.
	 */
	int fslen = strlen(p->props.FilesystemPath);

	/* Verify that path is indeed prefixed with the FilesystemPath. This really
	 * shouldn't fail unless someone manually modified the database. */
	if(strncmp(path, p->props.FilesystemPath, fslen) == 0 && (fslen == 1 || path[fslen+1] == '/')) {
		ywarn("'%s' is not a prefix of '%s', ignoring.", p->props.FilesystemPath, path);
		return;
	}
	if(fslen > 1)
		path = path + fslen;
	/* path is now relative to the configured share path, but does have a '/' prefix */

	share_fl_t *fl = share_fl_create(false, name, buf);
	share_fl_setsize(fl, size);
	fl->lastmod = lastmod;
	fl->pathid = p->id;
	memcpy(fl->tth, tth, 24);

	share_fl_t *dir = share_fl_getdir(p->s->root, p->props.VirtualPath, p->vpath_fold, buf+1);

	buf->l = 0;
	casestr_fold(path, buf);
	dir = share_fl_getdir(dir, path, buf->s, buf+1);

	share_fl_t *dup;
	if((dup = share_fl_insert(dir, fl)) != NULL) {
		/* We shouldn't have added the item to the database in the first place
		 * if it was going to be hidden by duplicate filename detection. But if
		 * it does happen, let's at least report it and free the item to
		 * prevent memory leakage. */
		buf[0].l = buf[1].l = 0;
		casestr_orig(share_fl_name(dup), buf);
		casestr_orig(share_fl_name(fl), buf+1);
		ywarn("Duplicate filenames in %s%s: %s vs. %s", p->props.FilesystemPath, path, buf[0].s, buf[1].s);
		free(fl);
	}
}




static void share_db_load_file(sqlite3_stmt *st, kstring_t *buf) {
	const char *fspath = (const char *)sqlite3_column_text(st, 0);
	const uint8_t *paths = sqlite3_column_blob(st, 1);
	size_t pathslen = sqlite3_column_bytes(st, 1);
	int64_t size = sqlite3_column_type(st, 2) == SQLITE_INTEGER ? sqlite3_column_int64(st, 2) : -1;
	int64_t lastmod = sqlite3_column_type(st, 3) == SQLITE_INTEGER ? sqlite3_column_int64(st, 3) : -1;
	const char *tth = sqlite3_column_blob(st, 4);

	if(!fspath || *fspath != '/' || !paths || (pathslen % 4) != 0 || size < 0 ||
			lastmod < 0 || !tth || sqlite3_column_bytes(st, 4) != 24)
		return;

	char *name = strrchr(fspath, '/');
	buf[2].l = 0;
	kputsn(fspath, fspath == name ? 1 : name-fspath, buf+2);
	name++;

	size_t i;
	for(i=0; i<pathslen; i+=4) {
		khiter_t k = kh_get(pathids, pathids, blob2pathid(paths+i));
		if(k == kh_end(pathids))
			continue;
		share_db_load_filepath(kh_value(pathids, k), buf[2].s, name, size, lastmod, tth, buf);
	}
}


/* The file list is loaded using sqlasync_custom(), because passing back
 * thousands or millions of query results over an asynchronous queue adds a
 * noticable overhead.  The share_fl_t structures are now generated in the
 * database thread.  There is no need for locks in this case; The main thread
 * is waiting for this function to complete before it will continue with
 * further initialization. */
static void share_db_load_sql(sqlasync_t *sql, sqlite3 *db, sqlasync_queue_t *q, int val_num, sqlasync_value_t *val) {
	sqlite3_stmt *st;
	kstring_t buf[3] = {}; /* Three string buffers for use with share_db_load_file() */

	sqlite3_prepare_v2(db, "SELECT path, pathids, size, lastmodified, tth_root FROM sharefiles", -1, &st, NULL);

	int r;
	while((r = sqlite3_step(st)) == SQLITE_ROW)
		share_db_load_file(st, buf);

	sqlite3_reset(st);
	sqlite3_finalize(st);
	free(buf[0].s);
	free(buf[1].s);
	free(buf[2].s);

	sqlasync_result_t *res = sqlasync_result_create(r, 1, r != SQLITE_DONE);
	if(r != SQLITE_DONE)
		res->col[0] = sqlasync_text(SQLASYNC_COPY, sqlite3_errmsg(db));
	sqlasync_queue_result(q, res);
}


void share_db_load() {
	sqlasync_custom(db_sql, NULL, share_db_createfuncs, 0);

	pathids_create();
	ydebug("Loading file list");

	double st = ev_time();
	sqlasync_queue_t *q = sqlasync_custom(db_sql, sqlasync_queue_sync(), share_db_load_sql, 0);
	sqlasync_result_free(db_get(q, "loading sharefiles"));
	sqlasync_queue_destroy(q);

	yinfo("File list loaded in %.3f seconds", ev_time()-st);
	pathids_destroy();
}


/* vim: set noet sw=4 ts=4: */