Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Difference From fsblobstore-0.0.4 To fsblobstore-0.0.5
2024-09-21
| ||
12:13 | Release maintenance. Leaf check-in: 87d76f5856 user: jan tags: fsblobstore-0.0.5, trunk | |
11:50 | Fix doc typo. check-in: b5b37540a0 user: jan tags: trunk | |
11:00 | Update dependencies. check-in: f346678cce user: jan tags: trunk | |
2024-01-30
| ||
16:39 | Happy Clippy. check-in: c017f1d615 user: jan tags: fsblobstore-0.0.4, trunk | |
16:34 | Happy clippy. check-in: d97283fae2 user: jan tags: trunk | |
Changes to Cargo.toml.
1 2 | [package] name = "fsblobstore" | | | > > | | | > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | [package] name = "fsblobstore" version = "0.0.5" edition = "2021" license = "0BSD" categories = [ "filesystem" ] keywords = [ "blob", "datastore" ] repository = "https://repos.qrnch.tech/pub/fsblobstore" description = "A file-system backed blob storage abstraction." rust-version = "1.63" exclude = [ ".fossil-settings", ".efiles", ".fslckout", "examples", "datastore", "www", "bacon.toml", "rustfmt.toml" ] [features] enumerate = ["dep:recstrm", "dep:walkdir"] get-fname = [] mkbasedir = [] [dependencies] hex = { version = "0.4.3" } idbag = { version = "0.2.0" } recstrm = { version = "0.0.1", optional = true } sha2 = { version = "0.10.8" } tmpfile = { version = "0.0.3" } walkdir = { version = "2.5.0", optional = true } [dev-dependencies] rand = { version = "0.8.5" } [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"] [lints.clippy] all = { level = "deny", priority = -1 } pedantic = { level = "warn", priority = -1 } nursery = { level = "warn", priority = -1 } cargo = { level = "warn", priority = -1 } |
Added bacon.toml.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | # This is a configuration file for the bacon tool # # Bacon repository: https://github.com/Canop/bacon # Complete help on configuration: https://dystroy.org/bacon/config/ # You can also check bacon's own bacon.toml file # as an example: https://github.com/Canop/bacon/blob/main/bacon.toml default_job = "clippy-all" [jobs.check] command = ["cargo", "check", "--color", "always"] need_stdout = false [jobs.check-all] command = ["cargo", "check", "--all-targets", "--color", "always"] need_stdout = false # Run clippy on the default target [jobs.clippy] command = [ "cargo", "clippy", "--all-features", "--color", "always", ] need_stdout = false # Run clippy on all targets # To disable some lints, you may change the job this way: # [jobs.clippy-all] # command = [ # "cargo", "clippy", # "--all-targets", # "--color", "always", # "--", # "-A", "clippy::bool_to_int_with_if", # "-A", "clippy::collapsible_if", # "-A", "clippy::derive_partial_eq_without_eq", # ] # need_stdout = false [jobs.clippy-all] command = [ "cargo", "clippy", "--all-features", "--all-targets", "--color", "always", ] need_stdout = false # This job lets you run # - all tests: bacon test # - a specific test: bacon test -- config::test_default_files # - the tests of a package: bacon test -- -- -p config [jobs.test] command = [ "cargo", "test", "--color", "always", "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124 ] need_stdout = true [jobs.doc] command = ["cargo", "doc", "--color", "always", "--no-deps"] need_stdout = false # If the doc compiles, then it opens in your browser and bacon switches # to the previous job [jobs.doc-open] command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"] need_stdout = false on_success = "back" # so that we don't open the browser at each change # You can run your application and have the result displayed in bacon, # *if* it makes sense for this crate. # Don't forget the `--color always` part or the errors won't be # properly parsed. # If your program never stops (eg a server), you may set `background` # to false to have the cargo run output immediately displayed instead # of waiting for program's end. [jobs.run] command = [ "cargo", "run", "--color", "always", # put launch parameters for your program behind a `--` separator ] need_stdout = true allow_warnings = true background = true # This parameterized job runs the example of your choice, as soon # as the code compiles. # Call it as # bacon ex -- my-example [jobs.ex] command = ["cargo", "run", "--color", "always", "--example"] need_stdout = true allow_warnings = true # You may define here keybindings that would be specific to # a project, for example a shortcut to launch a specific job. # Shortcuts to internal functions (scrolling, toggling, etc.) # should go in your personal global prefs.toml file instead. [keybindings] # alt-m = "job:my-job" c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target |
Changes to examples/rmall.rs.
1 2 3 4 5 6 | #[cfg(feature = "enumerate")] mod inner { use std::env; use fsblobstore::FsBlobStore; | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | #[cfg(feature = "enumerate")] mod inner { use std::env; use fsblobstore::FsBlobStore; pub fn main() { // // Set up datastore base directory // let curdir = env::current_dir().unwrap(); let datastoredir = curdir.join("datastore"); #[cfg(not(feature = "mkbasedir"))] if !datastoredir.exists() { std::fs::create_dir_all(&datastoredir).unwrap(); } let bs = FsBlobStore::new(datastoredir).unwrap(); // // Enumerate all keys in content store // let (rx, jh) = bs.enumerate(); let mut keys = Vec::new(); while let Some(ch) = rx.recv().unwrap() { println!("{ch}"); keys.push(ch); } jh.join().unwrap(); for key in keys { |
︙ | ︙ |
Changes to examples/simple.rs.
︙ | ︙ | |||
36 37 38 39 40 41 42 | #[cfg(feature = "enumerate")] mod enumerate { use super::FsBlobStore; use fsblobstore::ContentHash; use sha2::{Digest, Sha256}; | | | | 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | #[cfg(feature = "enumerate")] mod enumerate { use super::FsBlobStore; use fsblobstore::ContentHash; use sha2::{Digest, Sha256}; pub fn enum_hashes(bs: &FsBlobStore) { let (rx, jh) = bs.enumerate(); while let Some(ch) = rx.recv().unwrap() { println!("{ch}"); let reader = bs.reader(&ch).unwrap(); let reader = Box::new(reader); read_and_verify(reader, &ch); } |
︙ | ︙ |
Changes to src/ch.rs.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | use std::{ fmt, hash::{Hash, Hasher}, ops::Deref, str::FromStr }; /// A hash of a content blob. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct ContentHash(Vec<u8>); impl ContentHash { pub fn into_inner(self) -> Vec<u8> { self.0 } } impl Hash for ContentHash { fn hash<H: Hasher>(&self, state: &mut H) { self.0.hash(state); } } impl From<Vec<u8>> for ContentHash { fn from(vec: Vec<u8>) -> Self { assert_eq!(vec.len(), 32); | > | | | < < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | use std::{ fmt, hash::{Hash, Hasher}, ops::Deref, str::FromStr }; /// A hash of a content blob. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct ContentHash(Vec<u8>); impl ContentHash { #[must_use] pub fn into_inner(self) -> Vec<u8> { self.0 } } impl Hash for ContentHash { fn hash<H: Hasher>(&self, state: &mut H) { self.0.hash(state); } } impl From<Vec<u8>> for ContentHash { fn from(vec: Vec<u8>) -> Self { assert_eq!(vec.len(), 32); Self(vec) } } impl Deref for ContentHash { type Target = [u8]; fn deref(&self) -> &Self::Target { &self.0 } } impl fmt::Display for ContentHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let hexhash = hex::encode(&self.0); write!(f, "{hexhash}") } } impl FromStr for ContentHash { type Err = (); fn from_str(s: &str) -> Result<Self, Self::Err> { hex::decode(s).map_or(Err(()), |buf| Ok(Self::from(buf))) } } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : |
Changes to src/err.rs.
1 2 3 4 5 | use std::{fmt, io}; #[derive(Debug)] pub enum Error { BadFormat(String), | | > | | | < | < < | < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | use std::{fmt, io}; #[derive(Debug)] pub enum Error { BadFormat(String), IO(io::Error) } impl Error { #[allow(clippy::needless_pass_by_value)] pub fn bad_format(s: impl ToString) -> Self { Self::BadFormat(s.to_string()) } } impl std::error::Error for Error {} impl From<io::Error> for Error { fn from(err: io::Error) -> Self { Self::IO(err) } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::BadFormat(s) => write!(f, "Bad format error; {s}"), Self::IO(e) => write!(f, "I/O error; {e}") } } } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : |
Changes to src/lib.rs.
︙ | ︙ | |||
24 25 26 27 28 29 30 | #[cfg(feature = "enumerate")] use { std::{path::Component, thread}, walkdir::WalkDir }; | | | > | | | | | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #[cfg(feature = "enumerate")] use { std::{path::Component, thread}, walkdir::WalkDir }; use idbag::IdBagU32; use tmpfile::TmpProc; use sha2::{Digest, Sha256}; pub use ch::ContentHash; pub use tmpfile::{self, TmpFile}; pub use err::Error; /// Internal type used by the [`TmpFile`] to hash and move blobs into their /// final location. struct Hasher { inner: Sha256, _id: idbag::IdU32 } impl TmpProc for Hasher { type Output = ContentHash; type Error = Error; /// Called when a buffer is about to be written. fn update(&mut self, buf: &[u8]) { self.inner.update(buf); } fn finalize( &mut self, tmpfile: Option<&Path> ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); let fname = if let Some(tmpfile) = tmpfile { let Some(basedir) = tmpfile.parent() else { panic!("foo"); }; let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); let dir = basedir.join(subdir1).join(subdir2); if !dir.exists() { std::fs::create_dir_all(&dir)?; } Some(dir.join(fname)) } else { None }; Ok((ContentHash::from(hash), fname)) } } /// An abstraction over a blob storage in a file system directory. pub struct FsBlobStore { basedir: PathBuf, minsize: Option<usize>, /// Used to allocate unique identifiers for naming temporary files. idbag: IdBagU32 } impl FsBlobStore { fn fsparts(hexhash: &str) -> (&str, &str, &str) { let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); |
︙ | ︙ | |||
119 120 121 122 123 124 125 | impl FsBlobStore { /// Create a new file system-backed blob storage engine. /// /// The `basedir` is where the blobs and temporary files will be stored. The /// caller must ensure that either `basedir` is absolute, or that the path /// remains valid throughout the object's lifetime. /// | | > > > > > | > > > > > > > > > > > | > > > > > > > | | > > > > > | | | > > > | | | | 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | impl FsBlobStore { /// Create a new file system-backed blob storage engine. /// /// The `basedir` is where the blobs and temporary files will be stored. The /// caller must ensure that either `basedir` is absolute, or that the path /// remains valid throughout the object's lifetime. /// /// If the basedir does not exist, it will automatically be created if the /// `mkbasedir` feature is enabled. /// /// # Errors /// If `mkbasedir` feature is enabled, [`Error::IO`] indicates that the base /// directory can not be created. pub fn new(basedir: impl AsRef<Path>) -> Result<Self, Error> { let basedir = basedir.as_ref(); #[cfg(feature = "mkbasedir")] if !basedir.exists() { fs::create_dir_all(basedir)?; } Ok(Self { basedir: basedir.to_path_buf(), minsize: None, idbag: IdBagU32::new() }) } /// This function serves the purpose as [`FsBlobStore::new()`], but will /// enable support for storing small files in memory, rather than be written /// to disk. /// /// # Notes /// If support for storing small files in memory is enabled, "files" that /// will fall into this category will not actually be stored in the file /// system, and thus will neither be enumerable or read. /// /// The calling application must maintain its own databasse for such cases. #[allow(clippy::missing_errors_doc)] pub fn with_minsize( basedir: impl AsRef<Path>, minsize: usize ) -> Result<Self, Error> { let basedir = basedir.as_ref(); #[cfg(feature = "mkbasedir")] if !basedir.exists() { fs::create_dir_all(basedir)?; } Ok(Self { basedir: basedir.to_path_buf(), minsize: Some(minsize), idbag: IdBagU32::new() }) } /// Check if content for a hash exists in store. /// /// # Errors /// [`Error::IO`] indicates that it was not possible to determine whether the /// file exists. pub fn have(&self, hash: &[u8]) -> Result<bool, std::io::Error> { let fname = self.abspathname(hash); fname.try_exists() } /// Get a reader for a blob. /// /// # Errors /// [`Error::IO`] means the file could not be opened. pub fn reader( &self, hash: &[u8] ) -> Result<impl std::io::Read, std::io::Error> { let fname = self.abspathname(hash); fs::File::open(fname) } /// Return a [`TmpFile`] writer for writing to temporary file. /// /// If the caller wishes to keep the file it must call `TmpFile::persist()`. /// Dropping the `TmpFile`, without persisting it, will remove the temporary /// file. /// /// # Errors /// `std::io::Error` indicates that the temporary file could not be created. pub fn writer(&self) -> Result<TmpFile<ContentHash, Error>, std::io::Error> { let id = self.idbag.alloc(); let tmpfname = format!("tmp-{:08x}", id.get()); let tp = Hasher { inner: Sha256::new(), _id: id }; let tmpfname = self.basedir.join(tmpfname); if let Some(minsize) = self.minsize { TmpFile::with_minsize(tmpfname, Box::new(tp), minsize) } else { TmpFile::new(tmpfname, Box::new(tp)) } } /// Remove a blob, by its hash, from the blob store. /// /// # Errors /// `std::io::Error` indicates the file could not be removed. /// /// # Panics /// If the `hash` is not 32 bytes long this method will panic. pub fn rm(&self, hash: &[u8]) -> Result<(), std::io::Error> { let fname = self.abspathname(hash); fs::remove_file(&fname)?; let Some(subdir) = fname.parent() else { panic!("Unexpectedly unable to get parent directory."); }; let Ok(()) = fs::remove_dir(subdir) else { // Assume there are other files in this directory return Ok(()); }; let Some(subdir) = subdir.parent() else { panic!("Unexpectedly unable to get parent directory."); }; let Ok(()) = fs::remove_dir(subdir) else { // Assume there are other directories in this directory return Ok(()); }; Ok(()) } |
︙ | ︙ | |||
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | /// limited to infrequent integrity checks. /// /// This method will launch a background thread which lives as long as it /// performs its work. It is inadvisable to allow end users to trigger this /// method to be run. #[cfg(feature = "enumerate")] #[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))] pub fn enumerate( &self ) -> (recstrm::Receiver<ContentHash, ()>, thread::JoinHandle<()>) { let (tx, rx) = recstrm::channel::<ContentHash, ()>(32, None); let basedir = self.basedir.clone(); let jh = thread::spawn(move || { // Send hashes in batches let mut batch = Vec::with_capacity(16); | > > | > | 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | /// limited to infrequent integrity checks. /// /// This method will launch a background thread which lives as long as it /// performs its work. It is inadvisable to allow end users to trigger this /// method to be run. #[cfg(feature = "enumerate")] #[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))] #[allow(clippy::missing_panics_doc)] #[must_use] pub fn enumerate( &self ) -> (recstrm::Receiver<ContentHash, ()>, thread::JoinHandle<()>) { let (tx, rx) = recstrm::channel::<ContentHash, ()>(32, None); let basedir = self.basedir.clone(); let jh = thread::spawn(move || { // Send hashes in batches let mut batch = Vec::with_capacity(16); for entry in WalkDir::new(&basedir).into_iter().filter_map(Result::ok) { // Only care about entries of depth 3 (<subdir1>/<subdir2>/<file>) if entry.depth() != 3 { continue; } // Only care about regular files if !entry.file_type().is_file() { continue; } // Strip base directory from path let pth = entry.path(); // unwrap() should be okay, because path was constructed from basedir let pth = pth.strip_prefix(&basedir).unwrap(); // Construct a string from path components // Ignore any paths that have components that are not utf-8, and // ignore components that aren't "normal". let mut p = String::with_capacity(64); for c in pth.components() { |
︙ | ︙ | |||
293 294 295 296 297 298 299 300 301 302 303 304 | } // unwrap() is okay, since the it should have been sufficiently // validated above let hash = hex::decode(p).unwrap(); batch.push(ContentHash::from(hash)); if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() { break; } } if !batch.is_empty() { | > | > > > > | 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | } // unwrap() is okay, since the it should have been sufficiently // validated above let hash = hex::decode(p).unwrap(); batch.push(ContentHash::from(hash)); #[allow(clippy::iter_with_drain)] if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() { break; } } if !batch.is_empty() { let _ = tx.send_batch(batch.into_iter()); } }); (rx, jh) } /// Get complete filename of an existing blob. /// /// Returns `Ok(PathBuf)` containing the path to the content, if it exists. /// /// # Caveat /// The use of this method is strongly discouraged. Use /// `FsBlobStore::have()` to check if a blob exists in the datastore, /// `FsBlobStore::reader()` to read a blob, and `FsBlobStore::rm()` to remove /// a blob. /// /// # Errors /// `std::io::Error` indicates the file doesn't exists or its metadata could /// not be read. #[cfg(feature = "get-fname")] #[cfg_attr(docsrs, doc(cfg(feature = "get-fname")))] pub fn get_fname(&self, hash: &[u8]) -> Result<PathBuf, std::io::Error> { let fname = self.abspathname(hash); fs::metadata(&fname)?; Ok(fname) } } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : |
Changes to www/changelog.md.
1 2 3 4 | # Change Log ## [Unreleased] | > > | > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | # Change Log ⚠️ indicates a breaking change. ## [Unreleased] [Details](/vdiff?from=fsblobstore-0.0.5&to=trunk) ### Added ### Changed ### Removed --- ## [0.0.5] [Details](/vdiff?from=fsblobstore-0.0.4&to=fsblobstore-0.0.5) ### Changed - Update `idbag` to `0.2.0`. - ⚠️ Update `tmpfile` to `0.0.3`. --- ## [0.0.4] - 2024-01-30 [Details](/vdiff?from=fsblobstore-0.0.3&to=fsblobstore-0.0.4) ### Changed |
︙ | ︙ |
Changes to www/index.md.
︙ | ︙ | |||
13 14 15 16 17 18 19 | ## Feature labels in documentation The crate's documentation uses automatically generated feature labels, which currently requires nightly featuers. To build the documentation locally use: ``` | | < | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | ## Feature labels in documentation The crate's documentation uses automatically generated feature labels, which currently requires nightly featuers. To build the documentation locally use: ``` $ RUSTFLAGS="--cfg docsrs" RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features ``` ## Change log The details of changes can always be found in the timeline, but for a high-level view of changes between released versions there's a manually |
︙ | ︙ |