diff options
author | Yorhel <git@yorhel.nl> | 2014-03-14 10:23:13 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2014-03-14 10:23:13 +0100 |
commit | 313e9d64cd626bc6b15691d2a0804807f6d79ac1 (patch) | |
tree | ad312b873db6fba35cf112d5819ebd8ba7faa0bb | |
parent | 6fff6607d36f3c912eadc2018f6c7de06d146343 (diff) |
db.c: Ensure that /gc completes in linear time when cleaning up hashdata
This fixes http://dev.yorhel.nl/ncdc/bug/59
I'm not too happy about this fix because it adds significant disk I/O
for an index that would fit in memory just fine. There are workarounds
to make this more efficient, but they require more code. And I'm lazy.
-rw-r--r-- | src/db.c | 15 |
1 files changed, 11 insertions, 4 deletions
@@ -636,10 +636,17 @@ void db_fl_getids(void (*callback)(gint64)) { // Remove rows from the hashdata table that are not referenced from the // hashfiles table. void db_fl_purgedata() { - // Since there is no index on hashfiles(tth), one might expect this query to - // be extremely slow. Luckily sqlite is clever enough to create a temporary - // index for this query. - db_queue_push(0, "DELETE FROM hashdata WHERE NOT EXISTS(SELECT 1 FROM hashfiles WHERE tth = root)", DBQ_END); + // For small databases, sqlite is clever enough to create a temporary + // in-memory index on hashfiles(tth). But sometimes it doesn't, and then this + // query takes an hour or longer to run. To be on the safe side, explicitely + // create an index. This requires some extra disk space and makes a /gc run + // longer on average, but it should guarantee that a /gc actually finishes + // within a matter of minutes rather than hours. + db_queue_lock(); + db_queue_push_unlocked(DBF_NEXT, "CREATE UNIQUE INDEX hashfiles_tth_gc ON hashfiles (tth)", DBQ_END); + db_queue_push_unlocked(DBF_NEXT, "DELETE FROM hashdata WHERE NOT EXISTS(SELECT 1 FROM hashfiles WHERE tth = root)", DBQ_END); + db_queue_push_unlocked(0, "DROP INDEX hashfiles_tth_gc", DBQ_END); + db_queue_unlock(); } |