db.c: Ensure that /gc completes in linear time when cleaning up hashdata

This fixes http://dev.yorhel.nl/ncdc/bug/59 I'm not too happy about this fix because it adds significant disk I/O for an index that would fit in memory just fine. There are workarounds to make this more efficient, but they require more code. And I'm lazy.
author: Yorhel <git@yorhel.nl> 2014-03-14 10:23:13 +0100
committer: Yorhel <git@yorhel.nl> 2014-03-14 10:23:13 +0100
commit: 313e9d64cd626bc6b15691d2a0804807f6d79ac1 (patch)
tree: ad312b873db6fba35cf112d5819ebd8ba7faa0bb
parent: 6fff6607d36f3c912eadc2018f6c7de06d146343 (diff)
1 files changed, 11 insertions, 4 deletions
diff --git a/src/db.c b/src/db.c
index a4d98ce..017f5fa 100644
--- a/src/db.c
+++ b/src/db.c
@@ -636,10 +636,17 @@ void db_fl_getids(void (*callback)(gint64)) {
 // Remove rows from the hashdata table that are not referenced from the
 // hashfiles table.
 void db_fl_purgedata() {
-  // Since there is no index on hashfiles(tth), one might expect this query to
-  // be extremely slow. Luckily sqlite is clever enough to create a temporary
-  // index for this query.
-  db_queue_push(0, "DELETE FROM hashdata WHERE NOT EXISTS(SELECT 1 FROM hashfiles WHERE tth = root)", DBQ_END);
+  // For small databases, sqlite is clever enough to create a temporary
+  // in-memory index on hashfiles(tth). But sometimes it doesn't, and then this
+  // query takes an hour or longer to run. To be on the safe side, explicitely
+  // create an index. This requires some extra disk space and makes a /gc run
+  // longer on average, but it should guarantee that a /gc actually finishes
+  // within a matter of minutes rather than hours.
+  db_queue_lock();
+  db_queue_push_unlocked(DBF_NEXT, "CREATE UNIQUE INDEX hashfiles_tth_gc ON hashfiles (tth)", DBQ_END);
+  db_queue_push_unlocked(DBF_NEXT, "DELETE FROM hashdata WHERE NOT EXISTS(SELECT 1 FROM hashfiles WHERE tth = root)", DBQ_END);
+  db_queue_push_unlocked(0, "DROP INDEX hashfiles_tth_gc", DBQ_END);
+  db_queue_unlock();
 }
author	Yorhel <git@yorhel.nl>	2014-03-14 10:23:13 +0100
committer	Yorhel <git@yorhel.nl>	2014-03-14 10:23:13 +0100
commit	313e9d64cd626bc6b15691d2a0804807f6d79ac1 (patch)
tree	ad312b873db6fba35cf112d5819ebd8ba7faa0bb
parent	6fff6607d36f3c912eadc2018f6c7de06d146343 (diff)