summaryrefslogtreecommitdiff
path: root/indexer
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2016-11-09 18:41:53 +0100
committerYorhel <git@yorhel.nl>2016-11-09 18:41:53 +0100
commit20141aa98027a33da8322292e0a1d93ae82a0c53 (patch)
tree3da9b442d7028a30ac0f758f34232395ee259e55 /indexer
parent7d2abfb3a4d59297ff73dbf2dc67220edb8257b5 (diff)
indexer: Improve charset detection + lower file cache time
Diffstat (limited to 'indexer')
-rw-r--r--indexer/src/man.rs8
-rw-r--r--indexer/src/open.rs2
2 files changed, 7 insertions, 3 deletions
diff --git a/indexer/src/man.rs b/indexer/src/man.rs
index 9bcb2bf..024f652 100644
--- a/indexer/src/man.rs
+++ b/indexer/src/man.rs
@@ -152,8 +152,12 @@ fn codec_from_path(path: &str) -> Option<EncodingRef> {
(_, Some("euckr")) => return Some(all::WINDOWS_949),
- ("ja", Some("jis7")) |
- ("ja", Some("pck")) => return None, /* WAT? TODO: DO SOMETHING WITH THESE */
+ /* Not sure if PCK is just an alias for SJIS or if there's more of a difference, but it
+ * certainly looks like a SJIS-like encoding. */
+ ("ja", Some("pck")) => return Some(all::WINDOWS_31J),
+
+ /* This is apparently some variant of ISO-2022-JP */
+ ("ja", Some("jis7")) => return Some(all::ISO_2022_JP),
(_, Some(x)) => match encoding_from_whatwg_label(x) {
Some(x) => return Some(x),
diff --git a/indexer/src/open.rs b/indexer/src/open.rs
index 6919fc4..ef7e8b2 100644
--- a/indexer/src/open.rs
+++ b/indexer/src/open.rs
@@ -7,7 +7,7 @@ use hyper;
const CACHE_PATH: &'static str = "/var/tmp/manned-indexer";
-const CACHE_TIME: u64 = 24*3600;
+const CACHE_TIME: u64 = 23*3600;
pub struct Path<'a> {