summaryrefslogtreecommitdiff
path: root/indexer
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-11-05 10:22:31 +0100
committerYorhel <git@yorhel.nl>2016-11-05 10:22:31 +0100
commitaff68205b0f2a6fabcd4e77ddbc72eb19fcf6cdc (patch)
tree1362053dea7111cefbf6fbe294d56ba043527771 /indexer
parent0cab7586655c328392c2c5bee437e29728c8d8f9 (diff)
Add postgres package indexing + cli options
Diffstat (limited to 'indexer')
-rw-r--r--indexer/Cargo.lock207
-rw-r--r--indexer/Cargo.toml4
-rw-r--r--indexer/src/main.rs58
-rw-r--r--indexer/src/man.rs6
-rw-r--r--indexer/src/pkg.rs139
5 files changed, 376 insertions, 38 deletions
diff --git a/indexer/Cargo.lock b/indexer/Cargo.lock
index 1d0ea13..de07942 100644
--- a/indexer/Cargo.lock
+++ b/indexer/Cargo.lock
@@ -2,12 +2,14 @@
name = "indexer"
version = "0.1.0"
dependencies = [
- "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)",
+ "clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
"ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -21,61 +23,102 @@ dependencies = [
]
[[package]]
-name = "encoding"
-version = "0.2.33"
+name = "ansi_term"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "bitflags"
+version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "bufstream"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "byteorder"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "clap"
+version = "2.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "encoding"
+version = "0.3.0-dev"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
+ "encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
+ "encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)",
+ "encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)",
+ "encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)",
+ "encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
name = "encoding-index-japanese"
-version = "1.20141219.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "1.20141219.6"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
name = "encoding-index-korean"
-version = "1.20141219.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "1.20141219.6"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
name = "encoding-index-simpchinese"
-version = "1.20141219.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "1.20160120.0"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
name = "encoding-index-singlebyte"
-version = "1.20141219.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "1.20160120.0"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
name = "encoding-index-tradchinese"
-version = "1.20141219.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "1.20141219.6"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
dependencies = [
- "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)",
]
[[package]]
+name = "encoding-types"
+version = "0.2.0"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
+
+[[package]]
name = "encoding_index_tests"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+version = "0.1.5"
+source = "git+https://github.com/lifthrasiir/rust-encoding#61e331b0820311572fa00a06349b0f02511e810c"
[[package]]
name = "env_logger"
@@ -87,6 +130,16 @@ dependencies = [
]
[[package]]
+name = "fallible-iterator"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "hex"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -120,6 +173,11 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "md5"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "memchr"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -128,11 +186,48 @@ dependencies = [
]
[[package]]
+name = "phf"
+version = "0.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.7.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "pkg-config"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "postgres"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "postgres-protocol"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "regex"
version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -159,6 +254,21 @@ dependencies = [
]
[[package]]
+name = "strsim"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "term_size"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "thread-id"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -176,6 +286,16 @@ dependencies = [
]
[[package]]
+name = "unicode-segmentation"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "untrusted"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -186,6 +306,11 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "vec_map"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -197,27 +322,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
-"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec"
-"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91"
-"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81"
-"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7"
-"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a"
-"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18"
-"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569"
+"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6"
+"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
+"checksum bufstream 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7b48dbe2ff0e98fa2f03377d204a9637d3c9816cd431bfe05a8abbd0ea11d074"
+"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
+"checksum clap 2.17.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27dac76762fb56019b04aed3ccb43a770a18f80f9c2eb62ee1a18d9fb4ea2430"
+"checksum encoding 0.3.0-dev (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-index-japanese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-index-korean 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-index-simpchinese 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-index-singlebyte 1.20160120.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-index-tradchinese 1.20141219.6 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding-types 0.2.0 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
+"checksum encoding_index_tests 0.1.5 (git+https://github.com/lifthrasiir/rust-encoding)" = "<none>"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
+"checksum fallible-iterator 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5d48ab1bc11a086628e8cc0cc2c2dc200b884ac05c4b48fb71d6036b6999ff1d"
+"checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
"checksum libarchive3-sys 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3cd3beae8f59a4c7a806523269b5392037577c150446e88d684dfa6de6031ca7"
"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8"
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
+"checksum md5 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7df230903ccdffd6b3b4ec21624498ea64c912ce50297846907f0b8e1bb249dd"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum phf 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "17896951e179a6cbed7d3519b3078ac6c03a347d3e9cf8f303c8a1a73c5a3e44"
+"checksum phf_shared 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "bb6c14aac1140c2b06b41477096f249416b17c893d56386a892ac657edfdffba"
"checksum pkg-config 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8cee804ecc7eaf201a4a207241472cc870e825206f6c031e3ee2a72fa425f2fa"
+"checksum postgres 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7ef92468927003a037e175b54320319e358886865899b37f7318837a646a9fd"
+"checksum postgres-protocol 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7e2fc3d800dacc2dd749b690ad15b9b78bc04c26c3f0525cbe163436559bc3fc"
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
"checksum ring 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0743ef007bcff4909b107907a410418eb7e5c6ad55b843d70b39f62bfb7112e"
+"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
+"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0"
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b905d0fc2a1f0befd86b0e72e31d1787944efef9d38b9358a9e92a69757f7e3b"
+"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e"
"checksum untrusted 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5d9bc0e6e73a10975d1fbff8ac3541e221181b0d8998351600fb5523de634c0d"
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml
index f97465d..5c828ba 100644
--- a/indexer/Cargo.toml
+++ b/indexer/Cargo.toml
@@ -10,5 +10,7 @@ env_logger = "0.3.5"
lazy_static = "0.2.1"
libc = "0.2.17"
libarchive3-sys = "0.1.2"
-encoding = "0.2.33"
+encoding = { git = "https://github.com/lifthrasiir/rust-encoding", features = ["no-optimized-legacy-encoding"] }
ring = "0.5.3"
+postgres = "0.12.0"
+clap = "2.16.3"
diff --git a/indexer/src/main.rs b/indexer/src/main.rs
index bcb5ee8..5649528 100644
--- a/indexer/src/main.rs
+++ b/indexer/src/main.rs
@@ -1,17 +1,71 @@
#[macro_use] extern crate log;
#[macro_use] extern crate lazy_static;
+#[macro_use] extern crate clap;
extern crate env_logger;
extern crate regex;
extern crate libarchive3_sys;
extern crate libc;
extern crate ring;
extern crate encoding;
+extern crate postgres;
mod archive;
mod archread;
mod man;
+mod pkg;
+
+
+// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
+fn sysbyshort(conn: &postgres::GenericConnection, short: &str) -> i32 {
+ let r = conn.query("SELECT id FROM systems WHERE short = $1", &[&short]).unwrap();
+ if r.is_empty() {
+ panic!("Invalid system: {}", short);
+ }
+ r.get(0).get(0)
+}
+
fn main() {
- env_logger::init().unwrap();
- info!("Hello, world!");
+ let arg = clap_app!(indexer =>
+ (about: "Manned.org man page indexer")
+ (@arg v: -v +multiple "Increase verbosity")
+ (@arg host: -h +required +takes_value "PostgreSQL connection string")
+ (@subcommand pkg =>
+ (about: "Index a single package")
+ (@arg sys: --sys +required +takes_value "System short-name")
+ (@arg cat: --cat +required +takes_value "Package category")
+ (@arg pkg: --pkg +required +takes_value "Package name")
+ (@arg ver: --ver +required +takes_value "Package version")
+ (@arg date: --date +required +takes_value "Package release date")
+ (@arg FILE: +required "Package file")
+ )
+ ).get_matches();
+
+ let verbose = arg.occurrences_of("v");
+ env_logger::LogBuilder::new()
+ .filter(Some("indexer"), match verbose {
+ 0 => log::LogLevelFilter::Warn,
+ 1 => log::LogLevelFilter::Info,
+ 2 => log::LogLevelFilter::Debug,
+ _ => log::LogLevelFilter::Trace,
+ })
+ .filter(Some("postgres"), if verbose >= 4 { log::LogLevelFilter::Trace } else { log::LogLevelFilter::Info })
+ .init().unwrap();
+
+ let db = match postgres::Connection::connect(arg.value_of("host").unwrap(), postgres::TlsMode::None) {
+ Ok(x) => x,
+ Err(x) => { error!("Can't connect to postgres: {}", x); return },
+ };
+ debug!("Connected to database");
+
+ if let Some(matches) = arg.subcommand_matches("pkg") {
+ pkg::pkg(&db, pkg::PkgOpt {
+ sys: sysbyshort(&db, matches.value_of("sys").unwrap()),
+ cat: matches.value_of("cat").unwrap(),
+ pkg: matches.value_of("pkg").unwrap(),
+ ver: matches.value_of("ver").unwrap(),
+ date: matches.value_of("date").unwrap(),
+ file: matches.value_of("FILE").unwrap()
+ });
+ }
}
diff --git a/indexer/src/man.rs b/indexer/src/man.rs
index 56a45b3..9bcb2bf 100644
--- a/indexer/src/man.rs
+++ b/indexer/src/man.rs
@@ -18,7 +18,7 @@ const MIN_MAN_SIZE: u64 = 9;
// Checks a path for a man page candidate. Returns None if it doesn't seem like a man page
// location, otherwise Some((manPageName, Section, Locale)).
-fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
+pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
// Roughly: man[/locale]/man1/manpage.section[.compression]+
lazy_static! {
static ref RE: Regex = Regex::new(r"(?x)
@@ -112,8 +112,8 @@ fn codec_from_tag(data: &Vec<u8>) -> Option<EncodingRef> {
// latin-1 isn't in the whatwg spec under that name
"latin-1" => Some(all::WINDOWS_1252),
- // Waaaaaaaaah we can't decode this :(
- "armscii-8" => None,
+ // armscii isn't in the whatwg spec at all
+ "armscii-8" => Some(all::ARMSCII_8),
// Anything else should be found by its whatwg label.
x => match encoding_from_whatwg_label(x) {
diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs
new file mode 100644
index 0000000..a2c0c73
--- /dev/null
+++ b/indexer/src/pkg.rs
@@ -0,0 +1,139 @@
+use std;
+use std::io::Read;
+use postgres;
+
+use archive;
+use archread;
+use man;
+use archive::Archive;
+
+pub struct PkgOpt<'a> {
+ pub sys: i32,
+ pub cat: &'a str,
+ pub pkg: &'a str,
+ pub ver: &'a str,
+ pub date: &'a str,
+ pub file: &'a str
+}
+
+
+fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> {
+ // The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
+ // RETURNING clause wouldn't give us a package id.
+ let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3)
+ ON CONFLICT ON CONSTRAINT packages_system_name_category_key DO UPDATE SET name=$3 RETURNING id";
+ let pkgid: i32 = match tr.query(q, &[&opt.sys, &opt.cat, &opt.pkg]) {
+ Err(e) => {
+ error!("Can't insert package in database: {}", e);
+ return None;
+ },
+ Ok(r) => r.get(0).get(0),
+ };
+
+ // TODO: option to overwrite an existing package version
+ let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id";
+ let verid: i32 = match tr.query(q, &[&pkgid, &opt.ver, &opt.date]) {
+ Err(e) => {
+ error!("Can't insert package version in database: {}", e);
+ return None;
+ },
+ Ok(r) => r.get(0).get(0),
+ };
+ trace!("Package pkgid {} verid {}", pkgid, verid);
+ Some(verid)
+}
+
+
+fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) {
+ // TODO: Store 'encoding' in the database
+ let (name, sect, locale) = man::parse_path(path).unwrap();
+ if let Err(e) = tr.execute(
+ "INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)",
+ &[&verid, &name, &path, &locale, &hash, &sect]
+ ) {
+ // I think this can only happen if archread gives us the same file twice, which really
+ // shouldn't happen. But I'd rather continue with an error logged than panic.
+ error!("Can't insert verid {} fn {}: {}", verid, path, e);
+ }
+}
+
+
+fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent: &mut Read) {
+ let (dig, enc, cont) = match man::decode(paths, ent) {
+ Err(e) => { error!("Error decoding {}: {}", paths[0], e); return },
+ Ok(x) => x,
+ };
+
+ // TODO: Overwrite entry if the contents are different? It's possible that earlier decoding
+ // implementations didn't properly detect the encoding. (On the other hand, due to differences
+ // in filenames it's also possible that THIS decoding step went wrong. Ugh)
+ tr.execute(
+ "INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
+ &[&dig.as_ref(), &cont]
+ ).unwrap();
+
+ for path in paths {
+ insert_man_row(tr, verid, path, dig.as_ref());
+ debug!("Inserted man page: {} ({})", path, enc);
+ }
+}
+
+
+fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) {
+ let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
+ if hash.is_empty() { /* Can happen if man::decode() failed previously. */
+ error!("Link to unindexed man page: {} -> {}", src, dest);
+ return;
+ }
+ let hash: Vec<u8> = hash.get(0).get(0);
+ insert_man_row(tr, verid, src, &hash);
+ debug!("Inserted man link: {} -> {}", src, dest);
+}
+
+
+fn with_pkg<T,F>(file: &str, cb: F) -> std::io::Result<T>
+ where F: FnOnce(Option<archive::ArchiveEntry>) -> std::io::Result<T>
+{
+ // TODO: Support streaming from URLs
+ // TODO: How does .deb support fit into this? (Or anything else with metadata)
+ let mut f = try!(std::fs::File::open(file));
+ let ent = try!(Archive::open_archive(&mut f));
+ cb(ent)
+}
+
+
+fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> {
+ let indexfunc = |paths: &[&str], ent: &mut archive::ArchiveEntry| {
+ insert_man(tr, verid, paths, ent);
+ Ok(()) /* Don't propagate errors, continue handling other man pages */
+ };
+
+ let missed = try!(
+ with_pkg(opt.file, |ent| { archread::FileList::read(ent, man::ismanpath, &indexfunc) })
+ ).links(|src, dest| { insert_link(tr, verid, src, dest) });
+
+ if let Some(missed) = missed {
+ warn!("Some links were missed, reading package again");
+ try!(with_pkg(opt.file, |ent| { missed.read(ent, indexfunc) }))
+ }
+ Ok(())
+}
+
+
+pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
+ info!("Handling pkg: {} / {} / {} - {} @ {} in {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file);
+
+ let tr = conn.transaction().unwrap();
+ tr.set_rollback();
+
+ let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
+
+ match index_pkg(&tr, &opt, verid) {
+ Err(e) => error!("Error reading package: {}", e),
+ Ok(_) => tr.set_commit()
+ }
+
+ if let Err(e) = tr.finish() {
+ error!("Error finishing transaction: {}", e);
+ }
+}