tmpfile

Check-in Differences
Login

Check-in Differences

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Difference From tmpfile-0.0.2 To tmpfile-0.0.3

2024-09-21
10:49
Change log. Leaf check-in: 61d5a6e682 user: jan tags: trunk
10:44
Exclude example and bacon.toml from packaging. check-in: 9812603c60 user: jan tags: tmpfile-0.0.3, trunk
10:41
Update to swctx 0.3.0. check-in: 4dadb6cccc user: jan tags: trunk
2024-01-29
23:45
Update chanelog. check-in: 1d864e335e user: jan tags: trunk
23:36
Add documentation to Output type. check-in: b0dfc78142 user: jan tags: tmpfile-0.0.2, trunk
23:09
Add the ability to keep 'small' files in a memory buffer rather than write them to disk. check-in: bb9e97eabf user: jan tags: trunk

Changes to Cargo.toml.

1
2
3
4
5
6
7
8
9
10
11
12
13
14

15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33






[package]
name = "tmpfile"
version = "0.0.2"
edition = "2021"
license = "0BSD"
categories = [ "filesystem" ]
keywords = [ "tmpfile", "tempfile" ]
repository = "https://repos.qrnch.tech/pub/tmpfile"
description = "A temporary file writer with an option to persist."
rust-version = "1.56"
exclude = [
  ".fossil-settings",
  ".efiles",
  ".fslckout",

  "www",

  "rustfmt.toml"
]

[features]
defer-persist = ["dep:swctx"]

[dependencies]
swctx = { version = "0.2.2", optional = true }

[dev-dependencies]
hex = { version = "0.4.3" }
rand = { version = "0.8.5" }
sha2 =  { version = "0.10.8" }

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]









|











>

>







|










>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
[package]
name = "tmpfile"
version = "0.0.3"
edition = "2021"
license = "0BSD"
categories = [ "filesystem" ]
keywords = [ "tmpfile", "tempfile" ]
repository = "https://repos.qrnch.tech/pub/tmpfile"
description = "A temporary file writer with an option to persist."
rust-version = "1.56"
exclude = [
  ".fossil-settings",
  ".efiles",
  ".fslckout",
  "examples",
  "www",
  "bacon.toml",
  "rustfmt.toml"
]

[features]
defer-persist = ["dep:swctx"]

[dependencies]
swctx = { version = "0.3.0", optional = true }

[dev-dependencies]
hex = { version = "0.4.3" }
rand = { version = "0.8.5" }
sha2 =  { version = "0.10.8" }

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"]

[lints.clippy]
all = { level = "deny", priority = -1 }
pedantic = { level = "warn", priority = -1 }
nursery = { level = "warn", priority = -1 }
cargo = { level = "warn", priority = -1 }

Added bacon.toml.















































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# This is a configuration file for the bacon tool
#
# Bacon repository: https://github.com/Canop/bacon
# Complete help on configuration: https://dystroy.org/bacon/config/
# You can also check bacon's own bacon.toml file
#  as an example: https://github.com/Canop/bacon/blob/main/bacon.toml

default_job = "clippy-all"

[jobs.check]
command = ["cargo", "check", "--color", "always"]
need_stdout = false

[jobs.check-all]
command = ["cargo", "check", "--all-targets", "--color", "always"]
need_stdout = false

# Run clippy on the default target
[jobs.clippy]
command = [
    "cargo", "clippy",
    "--all-features",
    "--color", "always",
]
need_stdout = false

# Run clippy on all targets
# To disable some lints, you may change the job this way:
#    [jobs.clippy-all]
#    command = [
#        "cargo", "clippy",
#        "--all-targets",
#        "--color", "always",
#    	 "--",
#    	 "-A", "clippy::bool_to_int_with_if",
#    	 "-A", "clippy::collapsible_if",
#    	 "-A", "clippy::derive_partial_eq_without_eq",
#    ]
# need_stdout = false
[jobs.clippy-all]
command = [
    "cargo", "clippy",
    "--all-features",
    "--all-targets",
    "--color", "always",
]
need_stdout = false

# This job lets you run
# - all tests: bacon test
# - a specific test: bacon test -- config::test_default_files
# - the tests of a package: bacon test -- -- -p config
[jobs.test]
command = [
    "cargo", "test", "--color", "always",
    "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124
]
need_stdout = true

[jobs.doc]
command = ["cargo", "doc", "--color", "always", "--no-deps"]
need_stdout = false

# If the doc compiles, then it opens in your browser and bacon switches
# to the previous job
[jobs.doc-open]
command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"]
need_stdout = false
on_success = "back" # so that we don't open the browser at each change

# You can run your application and have the result displayed in bacon,
# *if* it makes sense for this crate.
# Don't forget the `--color always` part or the errors won't be
# properly parsed.
# If your program never stops (eg a server), you may set `background`
# to false to have the cargo run output immediately displayed instead
# of waiting for program's end.
[jobs.run]
command = [
    "cargo", "run",
    "--color", "always",
    # put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = true

# This parameterized job runs the example of your choice, as soon
# as the code compiles.
# Call it as
#    bacon ex -- my-example
[jobs.ex]
command = ["cargo", "run", "--color", "always", "--example"]
need_stdout = true
allow_warnings = true

# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
# alt-m = "job:my-job"
c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target

Changes to examples/defer.rs.

1
2
3
4
5
6
7
8
9
10
11
12
13











14
15
16
17
18
19
20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#[cfg(feature = "defer-persist")]
mod inner {
  use std::{
    io::Write,
    path::{Path, PathBuf},
    thread
  };

  use tmpfile::{TmpFile, TmpProc};

  use sha2::{Digest, Sha256};

  use rand::Rng;












  struct Hasher {
    inner: Sha256
  }

  impl TmpProc for Hasher {
    type Output = Vec<u8>;


    /// Called when a buffer is about to be written.
    fn update(&mut self, buf: &[u8]) {
      self.inner.update(buf);
    }

    fn finalize(
      &mut self,
      _tmpfile: Option<&Path>
    ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error> {
      let result = self.inner.clone().finalize();
      let hash = result.to_vec();

      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let subdirs = PathBuf::from(subdir1).join(subdir2);
      if !subdirs.exists() {
        std::fs::create_dir_all(&subdirs)?;
      }
      Ok((hash, Some(subdirs.join(fname))))
    }
  }


  pub(super) fn main() {
    let mut buf = vec![0u8; 65536];
    rand::thread_rng().fill(&mut buf[..]);

    let tp = Hasher {
      inner: Sha256::new()
    };
    let tp = Box::new(tp);













>
>
>
>
>
>
>
>
>
>
>







>









|






|
|
|

|




|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#[cfg(feature = "defer-persist")]
mod inner {
  use std::{
    io::Write,
    path::{Path, PathBuf},
    thread
  };

  use tmpfile::{TmpFile, TmpProc};

  use sha2::{Digest, Sha256};

  use rand::Rng;

  #[derive(Debug)]
  enum MyError {
    IO(())
  }

  impl From<std::io::Error> for MyError {
    fn from(_err: std::io::Error) -> Self {
      Self::IO(())
    }
  }

  struct Hasher {
    inner: Sha256
  }

  impl TmpProc for Hasher {
    type Output = Vec<u8>;
    type Error = MyError;

    /// Called when a buffer is about to be written.
    fn update(&mut self, buf: &[u8]) {
      self.inner.update(buf);
    }

    fn finalize(
      &mut self,
      _tmpfile: Option<&Path>
    ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
      let result = self.inner.clone().finalize();
      let hash = result.to_vec();

      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let dir = PathBuf::from(subdir1).join(subdir2);
      if !dir.exists() {
        std::fs::create_dir_all(&dir)?;
      }
      Ok((hash, Some(dir.join(fname))))
    }
  }


  pub fn main() {
    let mut buf = vec![0u8; 65536];
    rand::thread_rng().fill(&mut buf[..]);

    let tp = Hasher {
      inner: Sha256::new()
    };
    let tp = Box::new(tp);

Changes to examples/minsize.rs.

1
2
3
4
5
6
7
8
9
10











11
12
13
14
15
16
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
use std::{
  io::Write,
  path::{Path, PathBuf}
};

use tmpfile::{TmpFile, TmpProc};

use sha2::{Digest, Sha256};

use rand::Rng;












struct Hasher {
  inner: Sha256
}

impl TmpProc for Hasher {
  type Output = Vec<u8>;


  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();
    if tmpfile.is_some() {
      // The temporary file is stored in the file system.
      // We need to return a target location for it.
      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let subdirs = PathBuf::from(subdir1).join(subdir2);
      if !subdirs.exists() {
        std::fs::create_dir_all(&subdirs)?;
      }
      Ok((hash, Some(subdirs.join(fname))))
    } else {
      // The content is stored in memory
      Ok((hash, None))
    }
  }
}











>
>
>
>
>
>
>
>
>
>
>







>









|








|
|
|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
use std::{
  io::Write,
  path::{Path, PathBuf}
};

use tmpfile::{TmpFile, TmpProc};

use sha2::{Digest, Sha256};

use rand::Rng;

#[derive(Debug)]
enum MyError {
  IO(())
}

impl From<std::io::Error> for MyError {
  fn from(_err: std::io::Error) -> Self {
    Self::IO(())
  }
}

struct Hasher {
  inner: Sha256
}

impl TmpProc for Hasher {
  type Output = Vec<u8>;
  type Error = MyError;

  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();
    if tmpfile.is_some() {
      // The temporary file is stored in the file system.
      // We need to return a target location for it.
      let hexhash = hex::encode(&hash);
      let (subdir1, rest) = hexhash.split_at(2);
      let (subdir2, fname) = rest.split_at(2);
      let dir = PathBuf::from(subdir1).join(subdir2);
      if !dir.exists() {
        std::fs::create_dir_all(&dir)?;
      }
      Ok((hash, Some(dir.join(fname))))
    } else {
      // The content is stored in memory
      Ok((hash, None))
    }
  }
}

Changes to examples/simple.rs.

1
2
3
4
5
6
7
8
9
10











11
12
13
14
15
16
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
use std::{
  io::Write,
  path::{Path, PathBuf}
};

use tmpfile::{TmpFile, TmpProc};

use sha2::{Digest, Sha256};

use rand::Rng;












struct Hasher {
  inner: Sha256
}

impl TmpProc for Hasher {
  type Output = Vec<u8>;


  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    _tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();

    let hexhash = hex::encode(&hash);
    let (subdir1, rest) = hexhash.split_at(2);
    let (subdir2, fname) = rest.split_at(2);
    let subdirs = PathBuf::from(subdir1).join(subdir2);
    if !subdirs.exists() {
      std::fs::create_dir_all(&subdirs)?;
    }
    Ok((hash, Some(subdirs.join(fname))))
  }
}


fn main() {
  let mut buf = vec![0u8; 65536];
  rand::thread_rng().fill(&mut buf[..]);










>
>
>
>
>
>
>
>
>
>
>







>









|






|
|
|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
use std::{
  io::Write,
  path::{Path, PathBuf}
};

use tmpfile::{TmpFile, TmpProc};

use sha2::{Digest, Sha256};

use rand::Rng;

#[derive(Debug)]
enum MyError {
  IO(())
}

impl From<std::io::Error> for MyError {
  fn from(_err: std::io::Error) -> Self {
    Self::IO(())
  }
}

struct Hasher {
  inner: Sha256
}

impl TmpProc for Hasher {
  type Output = Vec<u8>;
  type Error = MyError;

  /// Called when a buffer is about to be written.
  fn update(&mut self, buf: &[u8]) {
    self.inner.update(buf);
  }

  fn finalize(
    &mut self,
    _tmpfile: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
    let result = self.inner.clone().finalize();
    let hash = result.to_vec();

    let hexhash = hex::encode(&hash);
    let (subdir1, rest) = hexhash.split_at(2);
    let (subdir2, fname) = rest.split_at(2);
    let dir = PathBuf::from(subdir1).join(subdir2);
    if !dir.exists() {
      std::fs::create_dir_all(&dir)?;
    }
    Ok((hash, Some(dir.join(fname))))
  }
}


fn main() {
  let mut buf = vec![0u8; 65536];
  rand::thread_rng().fill(&mut buf[..]);

Changes to src/lib.rs.

1
2








3




4

5

6






7
8
9
10
11
12
13
14
15
16




















17
18
19
20
21
22
23
24
25
26
27
28

29

30



31
32
33
34
35
36
37
38
39
40
41
42
43
44



45
46
47
48
49
50
51
52
53
54
55
56

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83




84
85
86
87
88
89
90





91
92
93
94
95
96
97




98
99
100
101
102
103
104





105
106
107
108
109
110
111
112
113
//! Representation of a temporary file that can be persisted.
//!








//! This is intended to be used in an application that needs to receive a file




//! to persistent storage, but doesn't know its final target file name until it

//! has received the entire file (such as if the file's name should be its

//! content's hash).






//!
//! # Minimum size
//! In some cases an application may not want to store small files in its
//! filesystem based data store.  For this purpose, the `TmpFile` can be set up
//! to have a minimum file size.  If a `TmpFile` does not reach this size
//! before being persisted, a buffer of the file's contents will be returned
//! instead of a file name of the persisted file.
//!
//! The [`TmpFile::with_minsize()`] factory method can be used to use this
//! feature.





















#![cfg_attr(docsrs, feature(doc_cfg))]

use std::{
  fs,
  io::Write,
  path::{Path, PathBuf},
  time::{Duration, Instant}
};


/// Used to inspect content as it is being fed to the temporary file.

pub trait TmpProc {

  type Output;




  /// Called when a buffer has been written to the `TmpFile` storage.
  fn update(&mut self, buf: &[u8]);

  /// Called when the application has chosen to persist the file.
  ///
  /// The role of this method is to:
  /// - Return its application-specific data of the associated type `Output`.
  /// - If `src` is `Some()` it means that the `TmpFile` is backed by a file,
  ///   and the implementation of this method should return, as the second
  ///   tuple member, `Some(PathBuf)`, pointing out the target file that the
  ///   temporary file should be persisted to.  If `src` is `None` the
  ///   temporary buffer is not stored in the file system and thus `None`
  ///   should be returned instead.



  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error>;
}


/// A [`TmpProc`] implementation which does nothing.
pub struct NullProc<'a>(&'a Path);

impl TmpProc for NullProc<'_> {
  type Output = ();


  #[allow(unused_variables)]
  fn update(&mut self, buf: &[u8]) {}

  #[allow(unused_variables)]
  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), std::io::Error> {
    Ok(((), Some(self.0.to_path_buf())))
  }
}

/// Temporary file contents container returned after successful persist.
#[derive(Debug)]
pub enum Output {
  /// The temporary file's contents have been persisted to a file.
  File(PathBuf),

  /// The temporary file's contents weren't large enough to be written to disk
  /// and are returned in this buffer.
  ///
  /// This variant can only occur if a minimum size threshold has been set.
  Buf(Vec<u8>)
}

impl Output {




  pub fn try_into_fname(self) -> Result<PathBuf, Output> {
    match self {
      Self::File(fname) => Ok(fname),
      r => Err(r)
    }
  }






  pub fn unwrap_fname(self) -> PathBuf {
    let Output::File(fname) = self else {
      panic!("Not a file name");
    };
    fname
  }





  pub fn try_into_buf(self) -> Result<Vec<u8>, Output> {
    match self {
      Self::Buf(buf) => Ok(buf),
      r => Err(r)
    }
  }






  pub fn unwrap_buf(self) -> Vec<u8> {
    let Output::Buf(buf) = self else {
      panic!("Not a buffer");
    };
    buf
  }
}

/// The final results of successfully persisting a [`TmpFile`].
|

>
>
>
>
>
>
>
>
|
>
>
>
>
|
>
|
>
|
>
>
>
>
>
>

|
|
|
|
|
|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>











|
>

>

>
>
>














>
>
>



|








>








|


















>
>
>
>
|


|



>
>
>
>
>

|





>
>
>
>
|


|



>
>
>
>
>

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
//! Temporary file object that can be persisted.
//!
//! # Usage overview
//! The main type of this crate is [`TmpFile`], which represents a (temporary)
//! file being written to, that may optionally be persisted (or it will be
//! removed).
//!
//! To use `TmpFile` an application must first implement the [`TmpProc`] trait
//! on a type.  [`TmpProc::update()`] will be called each time a block of data
//! is written to the `TmpFile`.  [`TmpProc::finalize()`] will be called if the
//! application chooses to persist the `TmpFile`.
//!
//! When a `TmpFile` is created, the application must pass along the file's
//! _temporary_ location and an object whose type implements `TmpProc`.  The
//! instantiated `TmpFile` object implements [`std::io::Write`], which is used
//! to write data to it.
//!
//! If the entire file can not be completed, the `TmpFile` object is dropped,
//! which will automatically remove the temporary file.
//!
//! If the entire file has been written, and the application wants to persist
//! it, it calls [`TmpFile::persist()`].  This will call the
//! `TmpProc::finalize()` trait method, whose responsibility it is to return
//! the file's persistent location (and application-defined data).  Information
//! about the persisted file is finally returned to the application via an
//! instantion of [`Persisted`].
//!
//! # "Small file" special case
//! An application may not want to store small files in its filesystem.
//! For this purpose, the `TmpFile` can be set up to have a minimum file size.
//! If a `TmpFile` does not reach this size before being persisted, a memory
//! buffer of the file's contents will be returned instead of a file name of
//! the persisted file.
//!
//! The [`TmpFile::with_minsize()`] factory method can be used to use this
//! feature.
//!
//! # Deferred persist
//! There may be cases where it's impractical to call [`TmpFile::persist()`] on
//! a `TmpFile`, but where the originator of the `TmpFile` wants to manage the
//! results from the `TmpFile` when it is pesisted.  This crate has means to
//! handle such situations, but it drastically changes the semantics of
//! `TmpFile`:  The `defer_persist()` method returns a wait context that can be
//! used to wait for the `TmpFile` to finalize and send its results.  In this
//! scenario, the finalization occurrs implicitly when the `TmpFile` is
//! dropped.
//!
//! This means that deferred persist shifts the default assumption of
//! "drop-before-persist implies failure" to "drop means persist", with no
//! means to trigger "abort without finalization" (unless the `TmpProc`'s
//! finalization is able to determine that the file is incomplete).
//!
//! # Features
//! | Feature         | Function
//! |-----------------|----------
//! | `defer-persist` | Allow `Drop` to finalize `TmpFile`.

#![cfg_attr(docsrs, feature(doc_cfg))]

use std::{
  fs,
  io::Write,
  path::{Path, PathBuf},
  time::{Duration, Instant}
};


/// Used to inspect contents as it is being fed to the temporary file and to
/// finalize the temporary file when it is being persisted.
pub trait TmpProc {
  /// Application-defined data to be returned on successful finalization.
  type Output;

  /// Application-defined error type.
  type Error;

  /// Called when a buffer has been written to the `TmpFile` storage.
  fn update(&mut self, buf: &[u8]);

  /// Called when the application has chosen to persist the file.
  ///
  /// The role of this method is to:
  /// - Return its application-specific data of the associated type `Output`.
  /// - If `src` is `Some()` it means that the `TmpFile` is backed by a file,
  ///   and the implementation of this method should return, as the second
  ///   tuple member, `Some(PathBuf)`, pointing out the target file that the
  ///   temporary file should be persisted to.  If `src` is `None` the
  ///   temporary buffer is not stored in the file system and thus `None`
  ///   should be returned instead.
  ///
  /// # Errors
  /// Returns application-specific errors.
  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error>;
}


/// A [`TmpProc`] implementation which does nothing.
pub struct NullProc<'a>(&'a Path);

impl TmpProc for NullProc<'_> {
  type Output = ();
  type Error = ();

  #[allow(unused_variables)]
  fn update(&mut self, buf: &[u8]) {}

  #[allow(unused_variables)]
  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
    Ok(((), Some(self.0.to_path_buf())))
  }
}

/// Temporary file contents container returned after successful persist.
#[derive(Debug)]
pub enum Output {
  /// The temporary file's contents have been persisted to a file.
  File(PathBuf),

  /// The temporary file's contents weren't large enough to be written to disk
  /// and are returned in this buffer.
  ///
  /// This variant can only occur if a minimum size threshold has been set.
  Buf(Vec<u8>)
}

impl Output {
  /// Fallibly convert `Output` to a `PathBuf`.
  ///
  /// # Errors
  /// If the `Output` does not represent a file name, then return the `Output`.
  pub fn try_into_fname(self) -> Result<PathBuf, Self> {
    match self {
      Self::File(fname) => Ok(fname),
      r @ Self::Buf(_) => Err(r)
    }
  }

  /// Unwrap `PathBuf`.
  ///
  /// # Panics
  /// The `Output` must represent a file name.
  #[must_use]
  pub fn unwrap_fname(self) -> PathBuf {
    let Self::File(fname) = self else {
      panic!("Not a file name");
    };
    fname
  }

  /// Fallibly convert `Output` to a buffer.
  ///
  /// # Errors
  /// If the `Output` does not represent a buffer, then return the `Output`.
  pub fn try_into_buf(self) -> Result<Vec<u8>, Self> {
    match self {
      Self::Buf(buf) => Ok(buf),
      r @ Self::File(_) => Err(r)
    }
  }

  /// Unwrap buffer.
  ///
  /// # Panics
  /// The `Output` must represent a buffer.
  #[must_use]
  pub fn unwrap_buf(self) -> Vec<u8> {
    let Self::Buf(buf) = self else {
      panic!("Not a buffer");
    };
    buf
  }
}

/// The final results of successfully persisting a [`TmpFile`].
136
137
138
139
140
141
142
143
144



145
146
147
148
149
150
151
152
153
154
155



156
157
158
159
160
161
162
163


struct MemBuf {
  buf: Vec<u8>,
  idx: usize
}

/// Temporary file contents generator.
pub struct TmpFile<T> {



  tmpfile: PathBuf,
  f: Option<Box<dyn Write + Send>>,
  tp: Box<dyn TmpProc<Output = T> + Send>,
  size: u64,
  start_time: Instant,
  membuf: Option<MemBuf>,
  #[cfg(feature = "defer-persist")]
  sctx: Option<swctx::SetCtx<Persisted<T>, (), std::io::Error>>
}

impl<T> TmpFile<T> {



  fn inner_persist(&mut self) -> Result<Persisted<T>, std::io::Error> {
    // Force close file, if open
    if let Some(f) = self.f.take() {
      drop(f);
    }

    let (output, t) = if let Some(ref mut membuf) = self.membuf {
      let mut buf = std::mem::take(&mut membuf.buf);







|
|
>
>
>


|




|


|
>
>
>
|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236


struct MemBuf {
  buf: Vec<u8>,
  idx: usize
}

/// File writer used to write to a temporary file that can be persisted.
pub struct TmpFile<T, E>
where
  E: From<std::io::Error>
{
  tmpfile: PathBuf,
  f: Option<Box<dyn Write + Send>>,
  tp: Box<dyn TmpProc<Output = T, Error = E> + Send>,
  size: u64,
  start_time: Instant,
  membuf: Option<MemBuf>,
  #[cfg(feature = "defer-persist")]
  sctx: Option<swctx::SetCtx<Persisted<T>, (), E>>
}

impl<T, E> TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn inner_persist(&mut self) -> Result<Persisted<T>, E> {
    // Force close file, if open
    if let Some(f) = self.f.take() {
      drop(f);
    }

    let (output, t) = if let Some(ref mut membuf) = self.membuf {
      let mut buf = std::mem::take(&mut membuf.buf);
186
187
188
189
190
191
192
193
194
195
196
197
198



199




200
201
202
203
204
205
206
207
208
209
      (Output::File(outfile), t)
    };

    Ok(Persisted {
      output,
      size: self.size,
      procres: t,
      duration: Instant::now() - self.start_time
    })
  }
}

impl<T> TmpFile<T> {



  /// Create a new [`TmpFile`].




  pub fn new<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T> + Send>
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;
    let f = Box::new(f);







|




|
>
>
>

>
>
>
>


|







259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
      (Output::File(outfile), t)
    };

    Ok(Persisted {
      output,
      size: self.size,
      procres: t,
      duration: self.start_time.elapsed()
    })
  }
}

impl<T, E> TmpFile<T, E>
where
  E: From<std::io::Error>
{
  /// Create a new [`TmpFile`].
  ///
  /// # Errors
  /// If the temporary file could not be opened for writing `std::io::Error` is
  /// returned.
  pub fn new<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T, Error = E> + Send>
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;
    let f = Box::new(f);
217
218
219
220
221
222
223




224
225
226
227
228
229
230
231
232
233
      #[cfg(feature = "defer-persist")]
      sctx: None
    })
  }

  /// Create a new [`TmpFile`] that will not write to file unless the size
  /// exceeds a specified size.




  pub fn with_minsize<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T> + Send>,
    minsize: usize
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;







>
>
>
>


|







297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
      #[cfg(feature = "defer-persist")]
      sctx: None
    })
  }

  /// Create a new [`TmpFile`] that will not write to file unless the size
  /// exceeds a specified size.
  ///
  /// # Errors
  /// If the temporary file could not be opened for writing `std::io::Error` is
  /// returned.
  pub fn with_minsize<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T, Error = E> + Send>,
    minsize: usize
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;
250
251
252
253
254
255
256




257
258
259
260

261
262
263
264
265
266
267

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320



321
322
323
324
325
326
327
  }


  /// Persist the hitherto temporary file.
  ///
  /// The location of the persisted file will be determined by the [`TmpProc`]
  /// object that was passed into [`TmpFile::new()`].




  #[cfg_attr(
    feature = "defer-persist",
    doc = r#"
# Panic

If the `TmpFile` has previously registered to receive the finalization
results via a channel using [`TmpFile::defer_persist()`] this method will
cause a panic.
"#
  )]
  pub fn persist(mut self) -> Result<Persisted<T>, std::io::Error> {
    #[cfg(feature = "defer-persist")]

    if self.sctx.is_some() {
      panic!(
        "Con not persist TmpFile that has been configured for deferred \
         persist"
      );
    }

    self.inner_persist()
  }

  /// Persist this temporary file on `Drop`, but report the finalized results
  /// using a one-shot channel.
  ///
  /// This method can be used if the `TmpFile` is type-erase cast into a
  /// `dyn std::io::Write` (which loses access to [`TmpFile::persist()`]).
  ///
  /// # Caveat
  /// When using a deferred persist, the semantics of the `TmpFile` changes
  /// from "assume failure" to "assume success".  If an error occurs which
  /// should cause the temporary file to no longer be persisted, the owner of
  /// the `TmpFile` must call [`TmpFile::cancel()`] on it.
  ///
  /// # Panic
  /// This method must only be called once per `TmpFile` object.  Calling it
  /// a second time will cause a panic.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn defer_persist(
    &mut self
  ) -> swctx::WaitCtx<Persisted<T>, (), std::io::Error> {
    if self.sctx.is_some() {
      panic!("TmpFile already configured for deferred persist");
    }


    let (sctx, wctx) = swctx::mkpair();

    self.sctx = Some(sctx);

    wctx
  }

  /// Cancel a deferred persist request.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn cancel(mut self) {
    // Take out the SetCtx so the Drop handler doesn't attempt to
    // finalize/persist.
    let _ = self.sctx.take();
  }
}


impl<T> Write for TmpFile<T> {



  fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
    // If there's a memory buffer, then append to it.  Unless this write would
    // overflow the membuf, in which case switch to using a file.
    if let Some(ref mut membuf) = self.membuf {
      if membuf.idx + buf.len() > membuf.buf.len() {
        // Current write index + size of input buffer would exceed maximum
        // buffer size.







>
>
>
>



|
>
|
|
<


|

>
|
<
|
<
|
<
















|




|
|
<
|
|
<
>



















|
>
>
>







334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351

352
353
354
355
356
357

358

359

360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

383
384

385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
  }


  /// Persist the hitherto temporary file.
  ///
  /// The location of the persisted file will be determined by the [`TmpProc`]
  /// object that was passed into [`TmpFile::new()`].
  ///
  /// # Errors
  /// If it was not possible to persist, the application-defined error `E` will
  /// be returned.
  #[cfg_attr(
    feature = "defer-persist",
    doc = r#"
# Panics
If the `defer-persist` feature is used: If the `TmpFile` has previously
registered to receive the finalization results via a channel using
[`TmpFile::defer_persist()`] this method will cause a panic.

"#
  )]
  pub fn persist(mut self) -> Result<Persisted<T>, E> {
    #[cfg(feature = "defer-persist")]
    assert!(
      self.sctx.is_none(),

      "Con not persist TmpFile that has been configured for deferred persist"

    );


    self.inner_persist()
  }

  /// Persist this temporary file on `Drop`, but report the finalized results
  /// using a one-shot channel.
  ///
  /// This method can be used if the `TmpFile` is type-erase cast into a
  /// `dyn std::io::Write` (which loses access to [`TmpFile::persist()`]).
  ///
  /// # Caveat
  /// When using a deferred persist, the semantics of the `TmpFile` changes
  /// from "assume failure" to "assume success".  If an error occurs which
  /// should cause the temporary file to no longer be persisted, the owner of
  /// the `TmpFile` must call [`TmpFile::cancel()`] on it.
  ///
  /// # Panics
  /// This method must only be called once per `TmpFile` object.  Calling it
  /// a second time will cause a panic.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn defer_persist(&mut self) -> swctx::WaitCtx<Persisted<T>, (), E> {
    assert!(

      self.sctx.is_none(),
      "TmpFile already configured for deferred persist"

    );

    let (sctx, wctx) = swctx::mkpair();

    self.sctx = Some(sctx);

    wctx
  }

  /// Cancel a deferred persist request.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn cancel(mut self) {
    // Take out the SetCtx so the Drop handler doesn't attempt to
    // finalize/persist.
    let _ = self.sctx.take();
  }
}


impl<T, E> Write for TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
    // If there's a memory buffer, then append to it.  Unless this write would
    // overflow the membuf, in which case switch to using a file.
    if let Some(ref mut membuf) = self.membuf {
      if membuf.idx + buf.len() > membuf.buf.len() {
        // Current write index + size of input buffer would exceed maximum
        // buffer size.
351
352
353
354
355
356
357


358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377



378
379
380
381
382
383
384
385
386
387
388



389



390
391
392
393
394
395
396
397
398
399
        // Update TmpProc
        self.tp.update(buf);

        return Ok(buf.len());
      }
    }



    let Some(ref mut f) = self.f else {
      panic!("No file?");
    };

    let n = f.write(buf)?;
    self.tp.update(&buf[..n]);
    self.size += n as u64;

    Ok(n)
  }

  fn flush(&mut self) -> Result<(), std::io::Error> {
    if let Some(ref mut f) = self.f {
      f.flush()?;
    };
    Ok(())
  }
}

impl<T> Drop for TmpFile<T> {



  fn drop(&mut self) {
    // Close file if it hasn't been already
    if let Some(f) = self.f.take() {
      drop(f);
    }

    // If deferred persist has been requested, then handle it here.
    #[cfg(feature = "defer-persist")]
    if let Some(sctx) = self.sctx.take() {
      match self.inner_persist() {
        Ok(res) => sctx.set(res),



        Err(e) => sctx.fail(e)



      }
    }

    if let Err(_e) = fs::remove_file(&self.tmpfile) {
      // ToDo: Log error?
    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :







>
>



















|
>
>
>










|
>
>
>
|
>
>
>










439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
        // Update TmpProc
        self.tp.update(buf);

        return Ok(buf.len());
      }
    }

    // At this point the Tmpfile is writing to a file in the file system.  The
    // memory buffer case should have returned early.
    let Some(ref mut f) = self.f else {
      panic!("No file?");
    };

    let n = f.write(buf)?;
    self.tp.update(&buf[..n]);
    self.size += n as u64;

    Ok(n)
  }

  fn flush(&mut self) -> Result<(), std::io::Error> {
    if let Some(ref mut f) = self.f {
      f.flush()?;
    };
    Ok(())
  }
}

impl<T, E> Drop for TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn drop(&mut self) {
    // Close file if it hasn't been already
    if let Some(f) = self.f.take() {
      drop(f);
    }

    // If deferred persist has been requested, then handle it here.
    #[cfg(feature = "defer-persist")]
    if let Some(sctx) = self.sctx.take() {
      match self.inner_persist() {
        Ok(res) => {
          // ToDo: Log error?
          let _ = sctx.set(res);
        }
        Err(e) => {
          // ToDo: Log error?
          let _ = sctx.fail(e);
        }
      }
    }

    if let Err(_e) = fs::remove_file(&self.tmpfile) {
      // ToDo: Log error?
    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :

Changes to www/changelog.md.

1
2
3
4


















5
6
7
8
9
10
11
12
# Change Log

## [Unreleased]



















[Details](/vdiff?from=tmpfile-0.0.1&to=trunk)

### Added

- Allow a "minimum file content size" to be specified.  If, at the point of
  persisting a `TmpFile` the total size of it is smaller than the configured
  threshold value, the file's contents will be returned as a memory buffer
  rather than to be persisted into a file.




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Change Log

## [Unreleased]

[Details](/vdiff?from=tmpfile-0.0.2&to=trunk)

### Added

### Changed

- Previously several error cases returned `std::io::Error`.  Now use an
  application-specific error type:  The `TmpProc` trait now has an `Error`
  type, and `TmpFile` uses a generic `E`, which has a `From<std::io::Error>`
  bound.
- Update to `swctx` `0.3.0`.

### Removed

---

## [0.0.2] - 2024-01-30

[Details](/vdiff?from=tmpfile-0.0.1&to=tmpfile-0.0.2)

### Added

- Allow a "minimum file content size" to be specified.  If, at the point of
  persisting a `TmpFile` the total size of it is smaller than the configured
  threshold value, the file's contents will be returned as a memory buffer
  rather than to be persisted into a file.

Changes to www/index.md.

1
2
3

4

5










6


7
8
9
10
11
12
13
# tmpfile

`TmpFile` is an object representing a temporary file that can optionally be

persisted.  It can be useful if a file being received over the network should

be named according to its contents, but the name can not be determined until










the entire file has been received.




## Feature labels in documentation

The crate's documentation uses automatically generated feature labels, which
currently requires nightly featuers.  To build the documentation locally use:




>
|
>
|
>
>
>
>
>
>
>
>
>
>
|
>
>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# tmpfile

`TmpFile` is an object representing a temporary file that can optionally be
persisted to a new location.

This library was created to provide a prepackaged solution where:

- An application wants to write a new (temporary) file, but wants it to be
  automatically removed if not explicitly requested to persist.  (For instance,
  receive a file over network, and automatically remove it if the connection is
  lost before the entire file could be received).
- The contents of a file is inspected as it is being received.  (For instance,
  while receiving a file over a network, calculate the file's hash as it is
  being received).
- An application may want the file to be written to a temporary location, and
  only moved to its permanent name once the entire file has been received.
  (For instance, the name of the file may be its hash, which is unavailable
  until the entire file has been processed).
- A small file isn't written to the file system at all, and is returned as a
  memory buffer instead.


## Feature labels in documentation

The crate's documentation uses automatically generated feature labels, which
currently requires nightly featuers.  To build the documentation locally use: