fsblobstore

Check-in Differences
Login

Check-in Differences

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Difference From fsblobstore-0.0.4 To fsblobstore-0.0.5

2024-09-21
12:13
Release maintenance. Leaf check-in: 87d76f5856 user: jan tags: fsblobstore-0.0.5, trunk
11:50
Fix doc typo. check-in: b5b37540a0 user: jan tags: trunk
11:00
Update dependencies. check-in: f346678cce user: jan tags: trunk
2024-01-30
16:39
Happy Clippy. check-in: c017f1d615 user: jan tags: fsblobstore-0.0.4, trunk
16:34
Happy clippy. check-in: d97283fae2 user: jan tags: trunk

Changes to Cargo.toml.

1
2
3
4
5
6
7
8
9
10
11
12
13
14

15
16

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39






[package]
name = "fsblobstore"
version = "0.0.4"
edition = "2021"
license = "0BSD"
categories = [ "filesystem" ]
keywords = [ "blob", "datastore" ]
repository = "https://repos.qrnch.tech/pub/fsblobstore"
description = "A file-system backed blob storage abstraction."
rust-version = "1.56"
exclude = [
  ".fossil-settings",
  ".efiles",
  ".fslckout",

  "datastore",
  "www",

  "rustfmt.toml"
]

[features]
enumerate = ["dep:recstrm", "dep:walkdir"]
get-fname = []
mkbasedir = []

[dependencies]
hex = { version = "0.4.3" }
idbag = { version = "0.1.2" }
recstrm = { version = "0.0.1", optional = true }
sha2 =  { version = "0.10.8" }
tmpfile = { version = "0.0.2" }
walkdir = { version = "2.4.0", optional = true }

[dev-dependencies]
rand = { version = "0.8.5" }

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]









|






|




>


>










|


|
|








>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
[package]
name = "fsblobstore"
version = "0.0.5"
edition = "2021"
license = "0BSD"
categories = [ "filesystem" ]
keywords = [ "blob", "datastore" ]
repository = "https://repos.qrnch.tech/pub/fsblobstore"
description = "A file-system backed blob storage abstraction."
rust-version = "1.63"
exclude = [
  ".fossil-settings",
  ".efiles",
  ".fslckout",
  "examples",
  "datastore",
  "www",
  "bacon.toml",
  "rustfmt.toml"
]

[features]
enumerate = ["dep:recstrm", "dep:walkdir"]
get-fname = []
mkbasedir = []

[dependencies]
hex = { version = "0.4.3" }
idbag = { version = "0.2.0" }
recstrm = { version = "0.0.1", optional = true }
sha2 =  { version = "0.10.8" }
tmpfile = { version = "0.0.3" }
walkdir = { version = "2.5.0", optional = true }

[dev-dependencies]
rand = { version = "0.8.5" }

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]

[lints.clippy]
all = { level = "deny", priority = -1 }
pedantic = { level = "warn", priority = -1 }
nursery = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }

Added bacon.toml.















































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# This is a configuration file for the bacon tool
#
# Bacon repository: https://github.com/Canop/bacon
# Complete help on configuration: https://dystroy.org/bacon/config/
# You can also check bacon's own bacon.toml file
#  as an example: https://github.com/Canop/bacon/blob/main/bacon.toml

default_job = "clippy-all"

[jobs.check]
command = ["cargo", "check", "--color", "always"]
need_stdout = false

[jobs.check-all]
command = ["cargo", "check", "--all-targets", "--color", "always"]
need_stdout = false

# Run clippy on the default target
[jobs.clippy]
command = [
    "cargo", "clippy",
    "--all-features",
    "--color", "always",
]
need_stdout = false

# Run clippy on all targets
# To disable some lints, you may change the job this way:
#    [jobs.clippy-all]
#    command = [
#        "cargo", "clippy",
#        "--all-targets",
#        "--color", "always",
#    	 "--",
#    	 "-A", "clippy::bool_to_int_with_if",
#    	 "-A", "clippy::collapsible_if",
#    	 "-A", "clippy::derive_partial_eq_without_eq",
#    ]
# need_stdout = false
[jobs.clippy-all]
command = [
    "cargo", "clippy",
    "--all-features",
    "--all-targets",
    "--color", "always",
]
need_stdout = false

# This job lets you run
# - all tests: bacon test
# - a specific test: bacon test -- config::test_default_files
# - the tests of a package: bacon test -- -- -p config
[jobs.test]
command = [
    "cargo", "test", "--color", "always",
    "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124
]
need_stdout = true

[jobs.doc]
command = ["cargo", "doc", "--color", "always", "--no-deps"]
need_stdout = false

# If the doc compiles, then it opens in your browser and bacon switches
# to the previous job
[jobs.doc-open]
command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"]
need_stdout = false
on_success = "back" # so that we don't open the browser at each change

# You can run your application and have the result displayed in bacon,
# *if* it makes sense for this crate.
# Don't forget the `--color always` part or the errors won't be
# properly parsed.
# If your program never stops (eg a server), you may set `background`
# to false to have the cargo run output immediately displayed instead
# of waiting for program's end.
[jobs.run]
command = [
    "cargo", "run",
    "--color", "always",
    # put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = true

# This parameterized job runs the example of your choice, as soon
# as the code compiles.
# Call it as
#    bacon ex -- my-example
[jobs.ex]
command = ["cargo", "run", "--color", "always", "--example"]
need_stdout = true
allow_warnings = true

# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
# alt-m = "job:my-job"
c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target

Changes to examples/rmall.rs.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#[cfg(feature = "enumerate")]
mod inner {
  use std::env;

  use fsblobstore::FsBlobStore;

  pub(super) fn main() {
    //
    // Set up datastore base directory
    //
    let curdir = env::current_dir().unwrap();
    let datastoredir = curdir.join("datastore");
    #[cfg(not(feature = "mkbasedir"))]
    if !datastoredir.exists() {
      std::fs::create_dir_all(&datastoredir).unwrap();
    }

    let bs = FsBlobStore::new(datastoredir).unwrap();

    //
    // Enumerate all keys in content store
    //
    let (rx, jh) = bs.enumerate();

    let mut keys = Vec::new();
    while let Some(ch) = rx.recv().unwrap() {
      println!("{}", ch);

      keys.push(ch);
    }

    jh.join().unwrap();

    for key in keys {






|



















|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#[cfg(feature = "enumerate")]
mod inner {
  use std::env;

  use fsblobstore::FsBlobStore;

  pub fn main() {
    //
    // Set up datastore base directory
    //
    let curdir = env::current_dir().unwrap();
    let datastoredir = curdir.join("datastore");
    #[cfg(not(feature = "mkbasedir"))]
    if !datastoredir.exists() {
      std::fs::create_dir_all(&datastoredir).unwrap();
    }

    let bs = FsBlobStore::new(datastoredir).unwrap();

    //
    // Enumerate all keys in content store
    //
    let (rx, jh) = bs.enumerate();

    let mut keys = Vec::new();
    while let Some(ch) = rx.recv().unwrap() {
      println!("{ch}");

      keys.push(ch);
    }

    jh.join().unwrap();

    for key in keys {

Changes to examples/simple.rs.

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

#[cfg(feature = "enumerate")]
mod enumerate {
  use super::FsBlobStore;
  use fsblobstore::ContentHash;
  use sha2::{Digest, Sha256};

  pub(super) fn enum_hashes(bs: &FsBlobStore) {
    let (rx, jh) = bs.enumerate();
    while let Some(ch) = rx.recv().unwrap() {
      println!("{}", ch);

      let reader = bs.reader(&ch).unwrap();
      let reader = Box::new(reader);

      read_and_verify(reader, &ch);
    }








|


|







36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

#[cfg(feature = "enumerate")]
mod enumerate {
  use super::FsBlobStore;
  use fsblobstore::ContentHash;
  use sha2::{Digest, Sha256};

  pub fn enum_hashes(bs: &FsBlobStore) {
    let (rx, jh) = bs.enumerate();
    while let Some(ch) = rx.recv().unwrap() {
      println!("{ch}");

      let reader = bs.reader(&ch).unwrap();
      let reader = Box::new(reader);

      read_and_verify(reader, &ch);
    }

Changes to src/ch.rs.

1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
use std::{
  fmt,
  hash::{Hash, Hasher},
  ops::Deref,
  str::FromStr
};

/// A hash of a content blob.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[repr(transparent)]
pub struct ContentHash(Vec<u8>);

impl ContentHash {

  pub fn into_inner(self) -> Vec<u8> {
    self.0
  }
}

impl Hash for ContentHash {
  fn hash<H: Hasher>(&self, state: &mut H) {
    self.0.hash(state);
  }
}

impl From<Vec<u8>> for ContentHash {
  fn from(vec: Vec<u8>) -> Self {
    assert_eq!(vec.len(), 32);
    ContentHash(vec)
  }
}

impl Deref for ContentHash {
  type Target = [u8];
  fn deref(&self) -> &Self::Target {
    &self.0
  }
}

impl fmt::Display for ContentHash {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    let hexhash = hex::encode(&self.0);
    write!(f, "{}", hexhash)
  }
}

impl FromStr for ContentHash {
  type Err = ();

  fn from_str(s: &str) -> Result<Self, Self::Err> {
    match hex::decode(s) {
      Ok(buf) => Ok(ContentHash::from(buf)),
      Err(_) => Err(())
    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :













>














|













|







|
<
<
<




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51



52
53
54
55
use std::{
  fmt,
  hash::{Hash, Hasher},
  ops::Deref,
  str::FromStr
};

/// A hash of a content blob.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
#[repr(transparent)]
pub struct ContentHash(Vec<u8>);

impl ContentHash {
  #[must_use]
  pub fn into_inner(self) -> Vec<u8> {
    self.0
  }
}

impl Hash for ContentHash {
  fn hash<H: Hasher>(&self, state: &mut H) {
    self.0.hash(state);
  }
}

impl From<Vec<u8>> for ContentHash {
  fn from(vec: Vec<u8>) -> Self {
    assert_eq!(vec.len(), 32);
    Self(vec)
  }
}

impl Deref for ContentHash {
  type Target = [u8];
  fn deref(&self) -> &Self::Target {
    &self.0
  }
}

impl fmt::Display for ContentHash {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    let hexhash = hex::encode(&self.0);
    write!(f, "{hexhash}")
  }
}

impl FromStr for ContentHash {
  type Err = ();

  fn from_str(s: &str) -> Result<Self, Self::Err> {
    hex::decode(s).map_or(Err(()), |buf| Ok(Self::from(buf)))



  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :

Changes to src/err.rs.

1
2
3
4
5
6
7
8
9

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
use std::{fmt, io};

#[derive(Debug)]
pub enum Error {
  BadFormat(String),
  IO(String)
}

impl Error {

  pub fn bad_format<S: ToString>(s: S) -> Self {
    Error::BadFormat(s.to_string())
  }
}

impl std::error::Error for Error {}

impl From<io::Error> for Error {
  fn from(err: io::Error) -> Self {
    Error::IO(err.to_string())
  }
}

impl fmt::Display for Error {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    match self {
      Error::BadFormat(s) => {
        write!(f, "Bad format error; {}", s)
      }
      Error::IO(s) => {
        write!(f, "I/O error; {}", s)
      }
    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :





|



>
|
|







|






<
|
<
<
|
<





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

27


28

29
30
31
32
33
use std::{fmt, io};

#[derive(Debug)]
pub enum Error {
  BadFormat(String),
  IO(io::Error)
}

impl Error {
  #[allow(clippy::needless_pass_by_value)]
  pub fn bad_format(s: impl ToString) -> Self {
    Self::BadFormat(s.to_string())
  }
}

impl std::error::Error for Error {}

impl From<io::Error> for Error {
  fn from(err: io::Error) -> Self {
    Self::IO(err)
  }
}

impl fmt::Display for Error {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    match self {

      Self::BadFormat(s) => write!(f, "Bad format error; {s}"),


      Self::IO(e) => write!(f, "I/O error; {e}")

    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :

Changes to src/lib.rs.

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

#[cfg(feature = "enumerate")]
use {
  std::{path::Component, thread},
  walkdir::WalkDir
};

use idbag::IdBag;

use tmpfile::TmpProc;

use sha2::{Digest, Sha256};

pub use ch::ContentHash;
pub use tmpfile::{self, TmpFile};

pub use err::Error;


/// Internal type used by the [`TmpFile`] to hash and move blobs into their
/// final location.
struct Hasher {
  inner: Sha256,
  _id: idbag::Id
}

impl TmpProc for Hasher {
  type Output = ContentHash;


  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();

    let fname = if let Some(tmpfile) = tmpfile {
      let Some(basedir) = tmpfile.parent() else {
        panic!("foo");
      };

      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let subdirs = basedir.join(subdir1).join(subdir2);
      if !subdirs.exists() {
        std::fs::create_dir_all(&subdirs)?;
      }
      Some(subdirs.join(fname))
    } else {
      None
    };
    Ok((ContentHash::from(hash), fname))
  }
}


/// An abstraction over a blob storage in a file system directory.
pub struct FsBlobStore {
  basedir: PathBuf,

  minsize: Option<usize>,

  /// Used to allocate unique identifiers for naming temporary files.
  idbag: IdBag
}

impl FsBlobStore {
  fn fsparts(hexhash: &str) -> (&str, &str, &str) {
    let (subdir1, rest) = hexhash.split_at(2);
    let (subdir2, fname) = rest.split_at(2);








|















|




>









|











|
|
|

|















|







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

#[cfg(feature = "enumerate")]
use {
  std::{path::Component, thread},
  walkdir::WalkDir
};

use idbag::IdBagU32;

use tmpfile::TmpProc;

use sha2::{Digest, Sha256};

pub use ch::ContentHash;
pub use tmpfile::{self, TmpFile};

pub use err::Error;


/// Internal type used by the [`TmpFile`] to hash and move blobs into their
/// final location.
struct Hasher {
  inner: Sha256,
  _id: idbag::IdU32
}

impl TmpProc for Hasher {
  type Output = ContentHash;
  type Error = Error;

  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();

    let fname = if let Some(tmpfile) = tmpfile {
      let Some(basedir) = tmpfile.parent() else {
        panic!("foo");
      };

      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let dir = basedir.join(subdir1).join(subdir2);
      if !dir.exists() {
        std::fs::create_dir_all(&dir)?;
      }
      Some(dir.join(fname))
    } else {
      None
    };
    Ok((ContentHash::from(hash), fname))
  }
}


/// An abstraction over a blob storage in a file system directory.
pub struct FsBlobStore {
  basedir: PathBuf,

  minsize: Option<usize>,

  /// Used to allocate unique identifiers for naming temporary files.
  idbag: IdBagU32
}

impl FsBlobStore {
  fn fsparts(hexhash: &str) -> (&str, &str, &str) {
    let (subdir1, rest) = hexhash.split_at(2);
    let (subdir2, fname) = rest.split_at(2);

119
120
121
122
123
124
125
126





127
128
129
130
131
132
133
134
135
136
137
138
139
140
141











142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161




162
163
164
165
166
167



168
169
170
171
172
173
174
175
176
177
178





179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
impl FsBlobStore {
  /// Create a new file system-backed blob storage engine.
  ///
  /// The `basedir` is where the blobs and temporary files will be stored.  The
  /// caller must ensure that either `basedir` is absolute, or that the path
  /// remains valid throughout the object's lifetime.
  ///
  /// If the basedir does not exist, the





  pub fn new(basedir: impl AsRef<Path>) -> Result<Self, Error> {
    let basedir = basedir.as_ref();

    #[cfg(feature = "mkbasedir")]
    if !basedir.exists() {
      fs::create_dir_all(basedir)?;
    }

    Ok(Self {
      basedir: basedir.to_path_buf(),
      minsize: None,
      idbag: IdBag::new()
    })
  }












  pub fn with_minsize(
    basedir: impl AsRef<Path>,
    minsize: usize
  ) -> Result<Self, Error> {
    let basedir = basedir.as_ref();

    #[cfg(feature = "mkbasedir")]
    if !basedir.exists() {
      fs::create_dir_all(basedir)?;
    }

    Ok(Self {
      basedir: basedir.to_path_buf(),
      minsize: Some(minsize),
      idbag: IdBag::new()
    })
  }


  /// Check if content for a hash exists in store.




  pub fn have(&self, hash: &[u8]) -> Result<bool, std::io::Error> {
    let fname = self.abspathname(hash);
    fname.try_exists()
  }

  /// Get a reader for a blob.



  pub fn reader(
    &self,
    hash: &[u8]
  ) -> Result<impl std::io::Read, std::io::Error> {
    let fname = self.abspathname(hash);
    fs::File::open(fname)
  }

  /// Return a writer for writing to temporary file.
  ///
  /// If the caller wishes to keep





  pub fn writer(&self) -> Result<TmpFile<ContentHash>, std::io::Error> {
    let id = self.idbag.alloc();
    let tmpfname = format!("tmp-{:08x}", id.val());
    let tp = Hasher {
      inner: Sha256::new(),
      _id: id
    };
    let tmpfname = self.basedir.join(tmpfname);
    if let Some(minsize) = self.minsize {
      TmpFile::with_minsize(tmpfname, Box::new(tp), minsize)
    } else {
      TmpFile::new(tmpfname, Box::new(tp))
    }
  }

  /// Remove a blob from the blob store.
  ///



  /// # Panic
  /// If the `hash` is not 32 bytes long this method will panic.
  pub fn rm(&self, hash: &[u8]) -> Result<(), std::io::Error> {
    let fname = self.abspathname(hash);

    fs::remove_file(&fname)?;

    let Some(subdir) = fname.parent() else {
      panic!("Unexpectedly unable to get parent directory.");
    };
    let Ok(_) = fs::remove_dir(subdir) else {
      // Assume there are other files in this directory
      return Ok(());
    };

    let Some(subdir) = subdir.parent() else {
      panic!("Unexpectedly unable to get parent directory.");
    };
    let Ok(_) = fs::remove_dir(subdir) else {
      // Assume there are other directories in this directory
      return Ok(());
    };

    Ok(())
  }








|
>
>
>
>
>











|



>
>
>
>
>
>
>
>
>
>
>














|





>
>
>
>






>
>
>








|

|
>
>
>
>
>
|

|












|

>
>
>
|









|







|







120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
impl FsBlobStore {
  /// Create a new file system-backed blob storage engine.
  ///
  /// The `basedir` is where the blobs and temporary files will be stored.  The
  /// caller must ensure that either `basedir` is absolute, or that the path
  /// remains valid throughout the object's lifetime.
  ///
  /// If the basedir does not exist, it will automatically be created if the
  /// `mkbasedir` feature is enabled.
  ///
  /// # Errors
  /// If `mkbasedir` feature is enabled, [`Error::IO`] indicates that the base
  /// directory can not be created.
  pub fn new(basedir: impl AsRef<Path>) -> Result<Self, Error> {
    let basedir = basedir.as_ref();

    #[cfg(feature = "mkbasedir")]
    if !basedir.exists() {
      fs::create_dir_all(basedir)?;
    }

    Ok(Self {
      basedir: basedir.to_path_buf(),
      minsize: None,
      idbag: IdBagU32::new()
    })
  }

  /// This function serves the purpose as [`FsBlobStore::new()`], but will
  /// enable support for storing small files in memory, rather than be written
  /// to disk.
  ///
  /// # Notes
  /// If support for storing small files in memory is enabled, "files" that
  /// will fall into this category will not actually be stored in the file
  /// system, and thus will neither be enumerable or read.
  ///
  /// The calling application must maintain its own databasse for such cases.
  #[allow(clippy::missing_errors_doc)]
  pub fn with_minsize(
    basedir: impl AsRef<Path>,
    minsize: usize
  ) -> Result<Self, Error> {
    let basedir = basedir.as_ref();

    #[cfg(feature = "mkbasedir")]
    if !basedir.exists() {
      fs::create_dir_all(basedir)?;
    }

    Ok(Self {
      basedir: basedir.to_path_buf(),
      minsize: Some(minsize),
      idbag: IdBagU32::new()
    })
  }


  /// Check if content for a hash exists in store.
  ///
  /// # Errors
  /// [`Error::IO`] indicates that it was not possible to determine whether the
  /// file exists.
  pub fn have(&self, hash: &[u8]) -> Result<bool, std::io::Error> {
    let fname = self.abspathname(hash);
    fname.try_exists()
  }

  /// Get a reader for a blob.
  ///
  /// # Errors
  /// [`Error::IO`] means the file could not be opened.
  pub fn reader(
    &self,
    hash: &[u8]
  ) -> Result<impl std::io::Read, std::io::Error> {
    let fname = self.abspathname(hash);
    fs::File::open(fname)
  }

  /// Return a [`TmpFile`] writer for writing to temporary file.
  ///
  /// If the caller wishes to keep the file it must call `TmpFile::persist()`.
  /// Dropping the `TmpFile`, without persisting it, will remove the temporary
  /// file.
  ///
  /// # Errors
  /// `std::io::Error` indicates that the temporary file could not be created.
  pub fn writer(&self) -> Result<TmpFile<ContentHash, Error>, std::io::Error> {
    let id = self.idbag.alloc();
    let tmpfname = format!("tmp-{:08x}", id.get());
    let tp = Hasher {
      inner: Sha256::new(),
      _id: id
    };
    let tmpfname = self.basedir.join(tmpfname);
    if let Some(minsize) = self.minsize {
      TmpFile::with_minsize(tmpfname, Box::new(tp), minsize)
    } else {
      TmpFile::new(tmpfname, Box::new(tp))
    }
  }

  /// Remove a blob, by its hash, from the blob store.
  ///
  /// # Errors
  /// `std::io::Error` indicates the file could not be removed.
  ///
  /// # Panics
  /// If the `hash` is not 32 bytes long this method will panic.
  pub fn rm(&self, hash: &[u8]) -> Result<(), std::io::Error> {
    let fname = self.abspathname(hash);

    fs::remove_file(&fname)?;

    let Some(subdir) = fname.parent() else {
      panic!("Unexpectedly unable to get parent directory.");
    };
    let Ok(()) = fs::remove_dir(subdir) else {
      // Assume there are other files in this directory
      return Ok(());
    };

    let Some(subdir) = subdir.parent() else {
      panic!("Unexpectedly unable to get parent directory.");
    };
    let Ok(()) = fs::remove_dir(subdir) else {
      // Assume there are other directories in this directory
      return Ok(());
    };

    Ok(())
  }

234
235
236
237
238
239
240


241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

262
263
264
265
266
267
268
  /// limited to infrequent integrity checks.
  ///
  /// This method will launch a background thread which lives as long as it
  /// performs its work.  It is inadvisable to allow end users to trigger this
  /// method to be run.
  #[cfg(feature = "enumerate")]
  #[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))]


  pub fn enumerate(
    &self
  ) -> (recstrm::Receiver<ContentHash, ()>, thread::JoinHandle<()>) {
    let (tx, rx) = recstrm::channel::<ContentHash, ()>(32, None);
    let basedir = self.basedir.clone();
    let jh = thread::spawn(move || {
      // Send hashes in batches
      let mut batch = Vec::with_capacity(16);
      for entry in WalkDir::new(&basedir).into_iter().filter_map(|e| e.ok()) {
        // Only care about entries of depth 3 (<subdir1>/<subdir2>/<file>)
        if entry.depth() != 3 {
          continue;
        }

        // Only care about regular files
        if !entry.file_type().is_file() {
          continue;
        }

        // Strip base directory from path
        let pth = entry.path();

        let pth = pth.strip_prefix(&basedir).unwrap();

        // Construct a string from path components
        // Ignore any paths that have components that are not utf-8, and
        // ignore components that aren't "normal".
        let mut p = String::with_capacity(64);
        for c in pth.components() {







>
>








|












>







266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
  /// limited to infrequent integrity checks.
  ///
  /// This method will launch a background thread which lives as long as it
  /// performs its work.  It is inadvisable to allow end users to trigger this
  /// method to be run.
  #[cfg(feature = "enumerate")]
  #[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))]
  #[allow(clippy::missing_panics_doc)]
  #[must_use]
  pub fn enumerate(
    &self
  ) -> (recstrm::Receiver<ContentHash, ()>, thread::JoinHandle<()>) {
    let (tx, rx) = recstrm::channel::<ContentHash, ()>(32, None);
    let basedir = self.basedir.clone();
    let jh = thread::spawn(move || {
      // Send hashes in batches
      let mut batch = Vec::with_capacity(16);
      for entry in WalkDir::new(&basedir).into_iter().filter_map(Result::ok) {
        // Only care about entries of depth 3 (<subdir1>/<subdir2>/<file>)
        if entry.depth() != 3 {
          continue;
        }

        // Only care about regular files
        if !entry.file_type().is_file() {
          continue;
        }

        // Strip base directory from path
        let pth = entry.path();
        // unwrap() should be okay, because path was constructed from basedir
        let pth = pth.strip_prefix(&basedir).unwrap();

        // Construct a string from path components
        // Ignore any paths that have components that are not utf-8, and
        // ignore components that aren't "normal".
        let mut p = String::with_capacity(64);
        for c in pth.components() {
293
294
295
296
297
298
299

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320




321
322
323
324
325
326
327
328
329
330
        }

        // unwrap() is okay, since the it should have been sufficiently
        // validated above
        let hash = hex::decode(p).unwrap();

        batch.push(ContentHash::from(hash));

        if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() {
          break;
        }
      }
      if !batch.is_empty() {
        let _ = tx.send_batch(batch.drain(..));
      }
    });

    (rx, jh)
  }

  /// Get complete filename of an existing blob.
  ///
  /// Returns `Ok(PathBuf)` containing the path to the content, if it exists.
  ///
  /// # Caveat
  /// The use of this method is strongly discouraged.  Use
  /// `FsBlobStore::have()` to check if a blob exists in the datastore,
  /// `FsBlobStore::reader()` to read a blob, and `FsBlobStore::rm()` to remove
  /// a blob.




  #[cfg(feature = "get-fname")]
  #[cfg_attr(docsrs, doc(cfg(feature = "get-fname")))]
  pub fn get_fname(&self, hash: &[u8]) -> Result<PathBuf, std::io::Error> {
    let fname = self.abspathname(hash);
    fs::metadata(&fname)?;
    Ok(fname)
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :







>





|















>
>
>
>










328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
        }

        // unwrap() is okay, since the it should have been sufficiently
        // validated above
        let hash = hex::decode(p).unwrap();

        batch.push(ContentHash::from(hash));
        #[allow(clippy::iter_with_drain)]
        if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() {
          break;
        }
      }
      if !batch.is_empty() {
        let _ = tx.send_batch(batch.into_iter());
      }
    });

    (rx, jh)
  }

  /// Get complete filename of an existing blob.
  ///
  /// Returns `Ok(PathBuf)` containing the path to the content, if it exists.
  ///
  /// # Caveat
  /// The use of this method is strongly discouraged.  Use
  /// `FsBlobStore::have()` to check if a blob exists in the datastore,
  /// `FsBlobStore::reader()` to read a blob, and `FsBlobStore::rm()` to remove
  /// a blob.
  ///
  /// # Errors
  /// `std::io::Error` indicates the file doesn't exists or its metadata could
  /// not be read.
  #[cfg(feature = "get-fname")]
  #[cfg_attr(docsrs, doc(cfg(feature = "get-fname")))]
  pub fn get_fname(&self, hash: &[u8]) -> Result<PathBuf, std::io::Error> {
    let fname = self.abspathname(hash);
    fs::metadata(&fname)?;
    Ok(fname)
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :

Changes to www/changelog.md.

1
2


3
4
5
6
7
8
9
10
11
12











13
14
15
16
17
18
19
# Change Log



## [Unreleased]

[Details](/vdiff?from=fsblobstore-0.0.4&to=trunk)

### Added

### Changed

### Removed












---

## [0.0.4] - 2024-01-30

[Details](/vdiff?from=fsblobstore-0.0.3&to=fsblobstore-0.0.4)

### Changed


>
>


|







>
>
>
>
>
>
>
>
>
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Change Log

⚠️  indicates a breaking change.

## [Unreleased]

[Details](/vdiff?from=fsblobstore-0.0.5&to=trunk)

### Added

### Changed

### Removed

---

## [0.0.5]

[Details](/vdiff?from=fsblobstore-0.0.4&to=fsblobstore-0.0.5)

### Changed

- Update `idbag` to `0.2.0`.
- ⚠️ Update `tmpfile` to `0.0.3`.

---

## [0.0.4] - 2024-01-30

[Details](/vdiff?from=fsblobstore-0.0.3&to=fsblobstore-0.0.4)

### Changed

Changes to www/index.md.

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28

## Feature labels in documentation

The crate's documentation uses automatically generated feature labels, which
currently requires nightly featuers.  To build the documentation locally use:

```
$ RUSTFLAGS="--cfg docsrs" RUSTDOCFLAGS="--cfg docsrs" \
cargo +nightly doc --all-features
```


## Change log

The details of changes can always be found in the timeline, but for a
high-level view of changes between released versions there's a manually







|
<







13
14
15
16
17
18
19
20

21
22
23
24
25
26
27

## Feature labels in documentation

The crate's documentation uses automatically generated feature labels, which
currently requires nightly featuers.  To build the documentation locally use:

```
$ RUSTFLAGS="--cfg docsrs" RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features

```


## Change log

The details of changes can always be found in the timeline, but for a
high-level view of changes between released versions there's a manually