Index: Cargo.toml ================================================================== --- Cargo.toml +++ Cargo.toml @@ -1,21 +1,23 @@ [package] name = "fsblobstore" -version = "0.0.4" +version = "0.0.5" edition = "2021" license = "0BSD" categories = [ "filesystem" ] keywords = [ "blob", "datastore" ] repository = "https://repos.qrnch.tech/pub/fsblobstore" description = "A file-system backed blob storage abstraction." -rust-version = "1.56" +rust-version = "1.63" exclude = [ ".fossil-settings", ".efiles", ".fslckout", + "examples", "datastore", "www", + "bacon.toml", "rustfmt.toml" ] [features] enumerate = ["dep:recstrm", "dep:walkdir"] @@ -22,18 +24,24 @@ get-fname = [] mkbasedir = [] [dependencies] hex = { version = "0.4.3" } -idbag = { version = "0.1.2" } +idbag = { version = "0.2.0" } recstrm = { version = "0.0.1", optional = true } sha2 = { version = "0.10.8" } -tmpfile = { version = "0.0.2" } -walkdir = { version = "2.4.0", optional = true } +tmpfile = { version = "0.0.3" } +walkdir = { version = "2.5.0", optional = true } [dev-dependencies] rand = { version = "0.8.5" } [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"] +[lints.clippy] +all = { level = "deny", priority = -1 } +pedantic = { level = "warn", priority = -1 } +nursery = { level = "warn", priority = -1 } +cargo = { level = "warn", priority = -1 } + ADDED bacon.toml Index: bacon.toml ================================================================== --- /dev/null +++ bacon.toml @@ -0,0 +1,103 @@ +# This is a configuration file for the bacon tool +# +# Bacon repository: https://github.com/Canop/bacon +# Complete help on configuration: https://dystroy.org/bacon/config/ +# You can also check bacon's own bacon.toml file +# as an example: https://github.com/Canop/bacon/blob/main/bacon.toml + +default_job = "clippy-all" + +[jobs.check] +command = ["cargo", "check", "--color", "always"] +need_stdout = false + +[jobs.check-all] +command = ["cargo", "check", "--all-targets", "--color", "always"] +need_stdout = false + +# Run clippy on the default target +[jobs.clippy] +command = [ + "cargo", "clippy", + "--all-features", + "--color", "always", +] +need_stdout = false + +# Run clippy on all targets +# To disable some lints, you may change the job this way: +# [jobs.clippy-all] +# command = [ +# "cargo", "clippy", +# "--all-targets", +# "--color", "always", +# "--", +# "-A", "clippy::bool_to_int_with_if", +# "-A", "clippy::collapsible_if", +# "-A", "clippy::derive_partial_eq_without_eq", +# ] +# need_stdout = false +[jobs.clippy-all] +command = [ + "cargo", "clippy", + "--all-features", + "--all-targets", + "--color", "always", +] +need_stdout = false + +# This job lets you run +# - all tests: bacon test +# - a specific test: bacon test -- config::test_default_files +# - the tests of a package: bacon test -- -- -p config +[jobs.test] +command = [ + "cargo", "test", "--color", "always", + "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124 +] +need_stdout = true + +[jobs.doc] +command = ["cargo", "doc", "--color", "always", "--no-deps"] +need_stdout = false + +# If the doc compiles, then it opens in your browser and bacon switches +# to the previous job +[jobs.doc-open] +command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"] +need_stdout = false +on_success = "back" # so that we don't open the browser at each change + +# You can run your application and have the result displayed in bacon, +# *if* it makes sense for this crate. +# Don't forget the `--color always` part or the errors won't be +# properly parsed. +# If your program never stops (eg a server), you may set `background` +# to false to have the cargo run output immediately displayed instead +# of waiting for program's end. +[jobs.run] +command = [ + "cargo", "run", + "--color", "always", + # put launch parameters for your program behind a `--` separator +] +need_stdout = true +allow_warnings = true +background = true + +# This parameterized job runs the example of your choice, as soon +# as the code compiles. +# Call it as +# bacon ex -- my-example +[jobs.ex] +command = ["cargo", "run", "--color", "always", "--example"] +need_stdout = true +allow_warnings = true + +# You may define here keybindings that would be specific to +# a project, for example a shortcut to launch a specific job. +# Shortcuts to internal functions (scrolling, toggling, etc.) +# should go in your personal global prefs.toml file instead. +[keybindings] +# alt-m = "job:my-job" +c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target Index: examples/rmall.rs ================================================================== --- examples/rmall.rs +++ examples/rmall.rs @@ -2,11 +2,11 @@ mod inner { use std::env; use fsblobstore::FsBlobStore; - pub(super) fn main() { + pub fn main() { // // Set up datastore base directory // let curdir = env::current_dir().unwrap(); let datastoredir = curdir.join("datastore"); @@ -22,11 +22,11 @@ // let (rx, jh) = bs.enumerate(); let mut keys = Vec::new(); while let Some(ch) = rx.recv().unwrap() { - println!("{}", ch); + println!("{ch}"); keys.push(ch); } jh.join().unwrap(); Index: examples/simple.rs ================================================================== --- examples/simple.rs +++ examples/simple.rs @@ -38,14 +38,14 @@ mod enumerate { use super::FsBlobStore; use fsblobstore::ContentHash; use sha2::{Digest, Sha256}; - pub(super) fn enum_hashes(bs: &FsBlobStore) { + pub fn enum_hashes(bs: &FsBlobStore) { let (rx, jh) = bs.enumerate(); while let Some(ch) = rx.recv().unwrap() { - println!("{}", ch); + println!("{ch}"); let reader = bs.reader(&ch).unwrap(); let reader = Box::new(reader); read_and_verify(reader, &ch); Index: src/ch.rs ================================================================== --- src/ch.rs +++ src/ch.rs @@ -9,10 +9,11 @@ #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct ContentHash(Vec); impl ContentHash { + #[must_use] pub fn into_inner(self) -> Vec { self.0 } } @@ -23,11 +24,11 @@ } impl From> for ContentHash { fn from(vec: Vec) -> Self { assert_eq!(vec.len(), 32); - ContentHash(vec) + Self(vec) } } impl Deref for ContentHash { type Target = [u8]; @@ -37,21 +38,18 @@ } impl fmt::Display for ContentHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let hexhash = hex::encode(&self.0); - write!(f, "{}", hexhash) + write!(f, "{hexhash}") } } impl FromStr for ContentHash { type Err = (); fn from_str(s: &str) -> Result { - match hex::decode(s) { - Ok(buf) => Ok(ContentHash::from(buf)), - Err(_) => Err(()) - } + hex::decode(s).map_or(Err(()), |buf| Ok(Self::from(buf))) } } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : Index: src/err.rs ================================================================== --- src/err.rs +++ src/err.rs @@ -1,36 +1,33 @@ use std::{fmt, io}; #[derive(Debug)] pub enum Error { BadFormat(String), - IO(String) + IO(io::Error) } impl Error { - pub fn bad_format(s: S) -> Self { - Error::BadFormat(s.to_string()) + #[allow(clippy::needless_pass_by_value)] + pub fn bad_format(s: impl ToString) -> Self { + Self::BadFormat(s.to_string()) } } impl std::error::Error for Error {} impl From for Error { fn from(err: io::Error) -> Self { - Error::IO(err.to_string()) + Self::IO(err) } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Error::BadFormat(s) => { - write!(f, "Bad format error; {}", s) - } - Error::IO(s) => { - write!(f, "I/O error; {}", s) - } + Self::BadFormat(s) => write!(f, "Bad format error; {s}"), + Self::IO(e) => write!(f, "I/O error; {e}") } } } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : Index: src/lib.rs ================================================================== --- src/lib.rs +++ src/lib.rs @@ -26,11 +26,11 @@ use { std::{path::Component, thread}, walkdir::WalkDir }; -use idbag::IdBag; +use idbag::IdBagU32; use tmpfile::TmpProc; use sha2::{Digest, Sha256}; @@ -42,25 +42,26 @@ /// Internal type used by the [`TmpFile`] to hash and move blobs into their /// final location. struct Hasher { inner: Sha256, - _id: idbag::Id + _id: idbag::IdU32 } impl TmpProc for Hasher { type Output = ContentHash; + type Error = Error; /// Called when a buffer is about to be written. fn update(&mut self, buf: &[u8]) { self.inner.update(buf); } fn finalize( &mut self, tmpfile: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error> { + ) -> Result<(Self::Output, Option), Self::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); let fname = if let Some(tmpfile) = tmpfile { let Some(basedir) = tmpfile.parent() else { @@ -68,15 +69,15 @@ }; let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); - let subdirs = basedir.join(subdir1).join(subdir2); - if !subdirs.exists() { - std::fs::create_dir_all(&subdirs)?; + let dir = basedir.join(subdir1).join(subdir2); + if !dir.exists() { + std::fs::create_dir_all(&dir)?; } - Some(subdirs.join(fname)) + Some(dir.join(fname)) } else { None }; Ok((ContentHash::from(hash), fname)) } @@ -88,11 +89,11 @@ basedir: PathBuf, minsize: Option, /// Used to allocate unique identifiers for naming temporary files. - idbag: IdBag + idbag: IdBagU32 } impl FsBlobStore { fn fsparts(hexhash: &str) -> (&str, &str, &str) { let (subdir1, rest) = hexhash.split_at(2); @@ -121,11 +122,16 @@ /// /// The `basedir` is where the blobs and temporary files will be stored. The /// caller must ensure that either `basedir` is absolute, or that the path /// remains valid throughout the object's lifetime. /// - /// If the basedir does not exist, the + /// If the basedir does not exist, it will automatically be created if the + /// `mkbasedir` feature is enabled. + /// + /// # Errors + /// If `mkbasedir` feature is enabled, [`Error::IO`] indicates that the base + /// directory can not be created. pub fn new(basedir: impl AsRef) -> Result { let basedir = basedir.as_ref(); #[cfg(feature = "mkbasedir")] if !basedir.exists() { @@ -133,14 +139,25 @@ } Ok(Self { basedir: basedir.to_path_buf(), minsize: None, - idbag: IdBag::new() + idbag: IdBagU32::new() }) } + /// This function serves the purpose as [`FsBlobStore::new()`], but will + /// enable support for storing small files in memory, rather than be written + /// to disk. + /// + /// # Notes + /// If support for storing small files in memory is enabled, "files" that + /// will fall into this category will not actually be stored in the file + /// system, and thus will neither be enumerable or read. + /// + /// The calling application must maintain its own databasse for such cases. + #[allow(clippy::missing_errors_doc)] pub fn with_minsize( basedir: impl AsRef, minsize: usize ) -> Result { let basedir = basedir.as_ref(); @@ -151,36 +168,48 @@ } Ok(Self { basedir: basedir.to_path_buf(), minsize: Some(minsize), - idbag: IdBag::new() + idbag: IdBagU32::new() }) } /// Check if content for a hash exists in store. + /// + /// # Errors + /// [`Error::IO`] indicates that it was not possible to determine whether the + /// file exists. pub fn have(&self, hash: &[u8]) -> Result { let fname = self.abspathname(hash); fname.try_exists() } /// Get a reader for a blob. + /// + /// # Errors + /// [`Error::IO`] means the file could not be opened. pub fn reader( &self, hash: &[u8] ) -> Result { let fname = self.abspathname(hash); fs::File::open(fname) } - /// Return a writer for writing to temporary file. + /// Return a [`TmpFile`] writer for writing to temporary file. + /// + /// If the caller wishes to keep the file it must call `TmpFile::persist()`. + /// Dropping the `TmpFile`, without persisting it, will remove the temporary + /// file. /// - /// If the caller wishes to keep - pub fn writer(&self) -> Result, std::io::Error> { + /// # Errors + /// `std::io::Error` indicates that the temporary file could not be created. + pub fn writer(&self) -> Result, std::io::Error> { let id = self.idbag.alloc(); - let tmpfname = format!("tmp-{:08x}", id.val()); + let tmpfname = format!("tmp-{:08x}", id.get()); let tp = Hasher { inner: Sha256::new(), _id: id }; let tmpfname = self.basedir.join(tmpfname); @@ -189,31 +218,34 @@ } else { TmpFile::new(tmpfname, Box::new(tp)) } } - /// Remove a blob from the blob store. + /// Remove a blob, by its hash, from the blob store. + /// + /// # Errors + /// `std::io::Error` indicates the file could not be removed. /// - /// # Panic + /// # Panics /// If the `hash` is not 32 bytes long this method will panic. pub fn rm(&self, hash: &[u8]) -> Result<(), std::io::Error> { let fname = self.abspathname(hash); fs::remove_file(&fname)?; let Some(subdir) = fname.parent() else { panic!("Unexpectedly unable to get parent directory."); }; - let Ok(_) = fs::remove_dir(subdir) else { + let Ok(()) = fs::remove_dir(subdir) else { // Assume there are other files in this directory return Ok(()); }; let Some(subdir) = subdir.parent() else { panic!("Unexpectedly unable to get parent directory."); }; - let Ok(_) = fs::remove_dir(subdir) else { + let Ok(()) = fs::remove_dir(subdir) else { // Assume there are other directories in this directory return Ok(()); }; Ok(()) @@ -236,19 +268,21 @@ /// This method will launch a background thread which lives as long as it /// performs its work. It is inadvisable to allow end users to trigger this /// method to be run. #[cfg(feature = "enumerate")] #[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))] + #[allow(clippy::missing_panics_doc)] + #[must_use] pub fn enumerate( &self ) -> (recstrm::Receiver, thread::JoinHandle<()>) { let (tx, rx) = recstrm::channel::(32, None); let basedir = self.basedir.clone(); let jh = thread::spawn(move || { // Send hashes in batches let mut batch = Vec::with_capacity(16); - for entry in WalkDir::new(&basedir).into_iter().filter_map(|e| e.ok()) { + for entry in WalkDir::new(&basedir).into_iter().filter_map(Result::ok) { // Only care about entries of depth 3 (//) if entry.depth() != 3 { continue; } @@ -257,10 +291,11 @@ continue; } // Strip base directory from path let pth = entry.path(); + // unwrap() should be okay, because path was constructed from basedir let pth = pth.strip_prefix(&basedir).unwrap(); // Construct a string from path components // Ignore any paths that have components that are not utf-8, and // ignore components that aren't "normal". @@ -295,16 +330,17 @@ // unwrap() is okay, since the it should have been sufficiently // validated above let hash = hex::decode(p).unwrap(); batch.push(ContentHash::from(hash)); + #[allow(clippy::iter_with_drain)] if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() { break; } } if !batch.is_empty() { - let _ = tx.send_batch(batch.drain(..)); + let _ = tx.send_batch(batch.into_iter()); } }); (rx, jh) } @@ -316,10 +352,14 @@ /// # Caveat /// The use of this method is strongly discouraged. Use /// `FsBlobStore::have()` to check if a blob exists in the datastore, /// `FsBlobStore::reader()` to read a blob, and `FsBlobStore::rm()` to remove /// a blob. + /// + /// # Errors + /// `std::io::Error` indicates the file doesn't exists or its metadata could + /// not be read. #[cfg(feature = "get-fname")] #[cfg_attr(docsrs, doc(cfg(feature = "get-fname")))] pub fn get_fname(&self, hash: &[u8]) -> Result { let fname = self.abspathname(hash); fs::metadata(&fname)?; Index: www/changelog.md ================================================================== --- www/changelog.md +++ www/changelog.md @@ -1,17 +1,30 @@ # Change Log +⚠️ indicates a breaking change. + ## [Unreleased] -[Details](/vdiff?from=fsblobstore-0.0.4&to=trunk) +[Details](/vdiff?from=fsblobstore-0.0.5&to=trunk) ### Added ### Changed ### Removed +--- + +## [0.0.5] + +[Details](/vdiff?from=fsblobstore-0.0.4&to=fsblobstore-0.0.5) + +### Changed + +- Update `idbag` to `0.2.0`. +- ⚠️ Update `tmpfile` to `0.0.3`. + --- ## [0.0.4] - 2024-01-30 [Details](/vdiff?from=fsblobstore-0.0.3&to=fsblobstore-0.0.4) Index: www/index.md ================================================================== --- www/index.md +++ www/index.md @@ -15,12 +15,11 @@ The crate's documentation uses automatically generated feature labels, which currently requires nightly featuers. To build the documentation locally use: ``` -$ RUSTFLAGS="--cfg docsrs" RUSTDOCFLAGS="--cfg docsrs" \ -cargo +nightly doc --all-features +$ RUSTFLAGS="--cfg docsrs" RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features ``` ## Change log