summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorhel <git@yorhel.nl>2020-05-25 09:06:34 +0200
committerYorhel <git@yorhel.nl>2020-05-25 09:06:34 +0200
commite955c5da130cb3af25098186722a7ee306beab05 (patch)
tree552da277cc5ef94cb1fbb56f159d47cdeb11c88e
parent6a4a2ea6c5cb3ac90d53645842729604dd4453cb (diff)
Add custom blake3 Hash type + a few utility functions from chifs-share
-rw-r--r--Cargo.toml9
-rw-r--r--src/blake3.rs178
-rw-r--r--src/httpserv.rs11
-rw-r--r--src/lib.rs5
-rw-r--r--src/util.rs15
5 files changed, 208 insertions, 10 deletions
diff --git a/Cargo.toml b/Cargo.toml
index a17c7de..a31c154 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,6 +9,10 @@ license = "MIT"
edition = "2018"
[dependencies]
+arrayvec = { version = "0.5", default-features = false, features = ["array-sizes-33-128"] }
+bendy = "0.3"
+blake3 = "0.3"
+bytes = { version = "0.5", optional = true }
chrono = { version = "0.4.6" }
clap = { version = "2.32.0", default-features = false }
colored = "1.7.0"
@@ -18,7 +22,12 @@ globset = "0.4.2"
httparse = "1.3.3"
log = "0.4.0"
multisock = "1.0.0"
+postgres-types = { version = "0.1", optional = true }
+sqlite = { version = "0.25", optional = true }
zstd = "0.4"
+[features]
+postgres = ["bytes", "postgres-types"]
+
[target."cfg(not(windows))".dependencies]
syslog = "4.0.1"
diff --git a/src/blake3.rs b/src/blake3.rs
new file mode 100644
index 0000000..d3f202b
--- /dev/null
+++ b/src/blake3.rs
@@ -0,0 +1,178 @@
+use std::convert::TryInto;
+use blake3::OUT_LEN;
+use crate::util::{hexdigit,ceil_div};
+
+#[derive(PartialEq,Eq,Debug,Clone,Copy)]
+pub struct HashParseError(());
+
+impl std::fmt::Display for HashParseError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ "Invalid length for Hash value".fmt(f)
+ }
+}
+impl std::error::Error for HashParseError {}
+
+
+
+/// Similar to blake3::Hash, except without the constant time Eq (unnecessary for file hashes,
+/// there's nothing secret about them) and with a bunch of extra convenience methods and
+/// conversions.
+#[derive(PartialEq,Eq,Hash,PartialOrd,Ord,Clone,Copy)]
+pub struct Hash([u8; OUT_LEN]);
+
+
+impl From<[u8; OUT_LEN]> for Hash {
+ #[inline]
+ fn from(bytes: [u8;OUT_LEN]) -> Hash { Hash(bytes) }
+}
+
+impl From<blake3::Hash> for Hash {
+ #[inline]
+ fn from(hash: blake3::Hash) -> Hash { Hash(hash.into()) }
+}
+
+impl From<Hash> for blake3::Hash {
+ #[inline]
+ fn from(hash: Hash) -> blake3::Hash { hash.0.into() }
+}
+
+impl std::convert::TryFrom<&[u8]> for Hash {
+ type Error = HashParseError;
+ fn try_from(v: &[u8]) -> Result<Hash,Self::Error> {
+ if v.len() != OUT_LEN {
+ return Err(HashParseError(()));
+ }
+ let mut h = Hash([0u8;OUT_LEN]);
+ h.0.copy_from_slice(v);
+ Ok(h)
+ }
+}
+
+impl std::str::FromStr for Hash {
+ type Err = HashParseError;
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ if s.len() != OUT_LEN*2 {
+ return Err(HashParseError(()));
+ }
+ let mut h = Hash([0u8;OUT_LEN]);
+ for (i,c) in s.as_bytes().chunks_exact(2).enumerate() {
+ match (hexdigit(c[0]), hexdigit(c[1])) {
+ (Some(a), Some(b)) => h.0[i] = a << 4 + b,
+ _ => return Err(HashParseError(()))
+ }
+ }
+ Ok(h)
+ }
+}
+
+impl AsRef<[u8]> for Hash {
+ #[inline]
+ fn as_ref(&self) -> &[u8] { &self.0[..] }
+}
+
+impl std::fmt::Debug for Hash {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "Hash({})", self.to_hex())
+ }
+}
+
+impl std::fmt::Display for Hash {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ self.to_hex().fmt(f)
+ }
+}
+
+#[cfg(feature="sqlite")]
+impl sqlite::Bindable for &Hash {
+ #[inline]
+ fn bind(self, st: &mut sqlite::Statement, idx: usize) -> sqlite::Result<()> {
+ st.bind(idx, &self.0[..])
+ }
+}
+
+#[cfg(feature="sqlite")]
+impl sqlite::Readable for Hash {
+ fn read(st: &sqlite::Statement, idx: usize) -> sqlite::Result<Hash> {
+ // This memory allocation can be avoided by calling the sqlite3 C API directly.
+ st.read::<Vec::<u8>>(idx).and_then(
+ |v| (&v[..]).try_into().map_err(|_| sqlite::Error { code: Some(20), message: Some(format!("Attempt to read hash value of {} bytes", v.len())) })
+ )
+ }
+}
+
+#[cfg(feature="postgres")]
+impl postgres_types::ToSql for Hash {
+ #[inline]
+ fn to_sql(&self, t: &postgres_types::Type, w: &mut bytes::BytesMut) -> Result<postgres_types::IsNull, Box<dyn std::error::Error + Sync + Send>> { (&self.0[..]).to_sql(t, w) }
+ #[inline]
+ fn accepts(ty: &postgres_types::Type) -> bool { ty == &postgres_types::Type::BYTEA || ty.kind() == &postgres_types::Kind::Domain(postgres_types::Type::BYTEA) }
+ to_sql_checked!();
+}
+
+#[cfg(feature="postgres")]
+impl<'a> postgres_types::FromSql<'a> for Hash {
+ #[inline]
+ fn from_sql(t: &postgres_types::Type, raw: &'a [u8]) -> Result<Hash, Box<dyn std::error::Error + Sync + Send>> {
+ <&[u8] as postgres_types::FromSql>::from_sql(t, raw).and_then(
+ |v| v.try_into().map_err(|e: HashParseError| e.into())
+ )
+ }
+ #[inline]
+ fn accepts(ty: &postgres_types::Type) -> bool { ty == &postgres_types::Type::BYTEA || ty.kind() == &postgres_types::Kind::Domain(postgres_types::Type::BYTEA) }
+}
+
+impl bendy::encoding::ToBencode for Hash {
+ const MAX_DEPTH: usize = 1;
+ #[inline]
+ fn encode(&self, encoder: bendy::encoding::SingleItemEncoder) -> Result<(), bendy::encoding::Error> {
+ encoder.emit_bytes(&self.0[..])
+ }
+}
+
+impl bendy::decoding::FromBencode for Hash {
+ const EXPECTED_RECURSION_DEPTH: usize = 0;
+ fn decode_bencode_object(object: bendy::decoding::Object) -> Result<Hash, bendy::decoding::Error> {
+ object.try_into_bytes().and_then(
+ |v| v.try_into().map_err(|_| bendy::decoding::Error::unexpected_token("hash value", "byte string"))
+ )
+ }
+}
+
+
+impl Hash {
+ pub fn to_hex(&self) -> arrayvec::ArrayString<[u8; OUT_LEN*2]> {
+ let mut s = arrayvec::ArrayString::new();
+ let table = b"0123456789abcdef";
+ for &b in self.0.iter() {
+ s.push(table[(b >> 4) as usize] as char);
+ s.push(table[(b & 0xf) as usize] as char);
+ }
+ s
+ }
+
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8;OUT_LEN] { &self.0 }
+
+ pub fn hash_chunk(chunk_num: u64, is_root: bool, buf: &[u8]) -> Hash {
+ blake3::guts::ChunkState::new(chunk_num).update(buf).finalize(is_root).into()
+ }
+
+ #[inline]
+ pub fn hash_buf(buf: &[u8]) -> Hash {
+ blake3::hash(buf).into()
+ }
+
+ /// Merge a list of chunks - all at the same level of the tree - into a single parent node
+ /// chaining value.
+ pub fn merge_chunks<F,E>(num_chunks: u64, is_root: bool, next: &mut F) -> Result<Hash,E> where F: FnMut(bool) -> Result<Hash,E> {
+ let left_chunks = ceil_div(num_chunks, 2).next_power_of_two();
+ let right_chunks = num_chunks - left_chunks;
+ if right_chunks == 0 {
+ next(is_root)
+ } else {
+ let left = Self::merge_chunks(left_chunks, false, next)?.into();
+ let right = Self::merge_chunks(right_chunks, false, next)?.into();
+ Ok(blake3::guts::parent_cv(&left, &right, is_root).into())
+ }
+ }
+}
diff --git a/src/httpserv.rs b/src/httpserv.rs
index bd3c327..3c779cd 100644
--- a/src/httpserv.rs
+++ b/src/httpserv.rs
@@ -11,7 +11,7 @@ use chrono::prelude::*;
use multisock::{Listener,Stream,SocketAddr};
use httparse;
-use crate::util::{VecFill,percent_escape};
+use crate::util::{VecFill,percent_escape,hexdigit};
/* This is a really simple multithreaded web server. It spawns a new thread for each connection and
* supports limiting the number of simultaneous connections and the number of active ("working", as
@@ -76,15 +76,6 @@ impl<'a> Request<'a> {
return None;
}
- fn hexdigit(b: u8) -> Option<u8> {
- match b {
- b'0'..=b'9' => Some(b - b'0'),
- b'A'..=b'F' => Some(b - b'A' + 10),
- b'a'..=b'f' => Some(b - b'a' + 10),
- _ => None
- }
- }
-
let mut out = Vec::with_capacity(buf.len());
let mut i = 0;
let ubuf = buf.as_bytes();
diff --git a/src/lib.rs b/src/lib.rs
index 012a772..bc23692 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,13 @@
#![allow(clippy::or_fun_call, clippy::identity_op, clippy::redundant_field_names)]
+#[cfg(feature="postgres")] #[macro_use] extern crate postgres_types;
#[macro_use] extern crate log;
pub mod config;
pub mod httpserv;
pub mod cli;
pub mod util;
+
+mod blake3;
+
+pub use crate::blake3::Hash;
diff --git a/src/util.rs b/src/util.rs
index 7479fde..3519830 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,5 +1,20 @@
use std::io::{self,Read};
+/// Ceiling integer division
+pub fn ceil_div(a: u64, b: u64) -> u64 { if a == 0 { 0 } else { ((a - 1) / b) + 1 } }
+
+
+/// Convert a hex digit [0-9a-fA-F] into its value (0-15)
+pub fn hexdigit(b: u8) -> Option<u8> {
+ match b {
+ b'0'..=b'9' => Some(b - b'0'),
+ b'A'..=b'F' => Some(b - b'A' + 10),
+ b'a'..=b'f' => Some(b - b'a' + 10),
+ _ => None
+ }
+}
+
+
/// Handy trait to make it easier to use a `Vec<u8>` as a network buffer. The usage pattern is
/// roughly as follows:
///