Index: Cargo.toml ================================================================== --- Cargo.toml +++ Cargo.toml @@ -1,8 +1,8 @@ [package] name = "tmpfile" -version = "0.0.2" +version = "0.0.3" edition = "2021" license = "0BSD" categories = [ "filesystem" ] keywords = [ "tmpfile", "tempfile" ] repository = "https://repos.qrnch.tech/pub/tmpfile" @@ -10,19 +10,21 @@ rust-version = "1.56" exclude = [ ".fossil-settings", ".efiles", ".fslckout", + "examples", "www", + "bacon.toml", "rustfmt.toml" ] [features] defer-persist = ["dep:swctx"] [dependencies] -swctx = { version = "0.2.2", optional = true } +swctx = { version = "0.3.0", optional = true } [dev-dependencies] hex = { version = "0.4.3" } rand = { version = "0.8.5" } sha2 = { version = "0.10.8" } @@ -29,5 +31,11 @@ [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs", "--generate-link-to-definition"] +[lints.clippy] +all = { level = "deny", priority = -1 } +pedantic = { level = "warn", priority = -1 } +nursery = { level = "warn", priority = -1 } +cargo = { level = "warn", priority = -1 } + ADDED bacon.toml Index: bacon.toml ================================================================== --- /dev/null +++ bacon.toml @@ -0,0 +1,103 @@ +# This is a configuration file for the bacon tool +# +# Bacon repository: https://github.com/Canop/bacon +# Complete help on configuration: https://dystroy.org/bacon/config/ +# You can also check bacon's own bacon.toml file +# as an example: https://github.com/Canop/bacon/blob/main/bacon.toml + +default_job = "clippy-all" + +[jobs.check] +command = ["cargo", "check", "--color", "always"] +need_stdout = false + +[jobs.check-all] +command = ["cargo", "check", "--all-targets", "--color", "always"] +need_stdout = false + +# Run clippy on the default target +[jobs.clippy] +command = [ + "cargo", "clippy", + "--all-features", + "--color", "always", +] +need_stdout = false + +# Run clippy on all targets +# To disable some lints, you may change the job this way: +# [jobs.clippy-all] +# command = [ +# "cargo", "clippy", +# "--all-targets", +# "--color", "always", +# "--", +# "-A", "clippy::bool_to_int_with_if", +# "-A", "clippy::collapsible_if", +# "-A", "clippy::derive_partial_eq_without_eq", +# ] +# need_stdout = false +[jobs.clippy-all] +command = [ + "cargo", "clippy", + "--all-features", + "--all-targets", + "--color", "always", +] +need_stdout = false + +# This job lets you run +# - all tests: bacon test +# - a specific test: bacon test -- config::test_default_files +# - the tests of a package: bacon test -- -- -p config +[jobs.test] +command = [ + "cargo", "test", "--color", "always", + "--", "--color", "always", # see https://github.com/Canop/bacon/issues/124 +] +need_stdout = true + +[jobs.doc] +command = ["cargo", "doc", "--color", "always", "--no-deps"] +need_stdout = false + +# If the doc compiles, then it opens in your browser and bacon switches +# to the previous job +[jobs.doc-open] +command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"] +need_stdout = false +on_success = "back" # so that we don't open the browser at each change + +# You can run your application and have the result displayed in bacon, +# *if* it makes sense for this crate. +# Don't forget the `--color always` part or the errors won't be +# properly parsed. +# If your program never stops (eg a server), you may set `background` +# to false to have the cargo run output immediately displayed instead +# of waiting for program's end. +[jobs.run] +command = [ + "cargo", "run", + "--color", "always", + # put launch parameters for your program behind a `--` separator +] +need_stdout = true +allow_warnings = true +background = true + +# This parameterized job runs the example of your choice, as soon +# as the code compiles. +# Call it as +# bacon ex -- my-example +[jobs.ex] +command = ["cargo", "run", "--color", "always", "--example"] +need_stdout = true +allow_warnings = true + +# You may define here keybindings that would be specific to +# a project, for example a shortcut to launch a specific job. +# Shortcuts to internal functions (scrolling, toggling, etc.) +# should go in your personal global prefs.toml file instead. +[keybindings] +# alt-m = "job:my-job" +c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target Index: examples/defer.rs ================================================================== --- examples/defer.rs +++ examples/defer.rs @@ -9,43 +9,55 @@ use tmpfile::{TmpFile, TmpProc}; use sha2::{Digest, Sha256}; use rand::Rng; + + #[derive(Debug)] + enum MyError { + IO(()) + } + + impl From for MyError { + fn from(_err: std::io::Error) -> Self { + Self::IO(()) + } + } struct Hasher { inner: Sha256 } impl TmpProc for Hasher { type Output = Vec; + type Error = MyError; /// Called when a buffer is about to be written. fn update(&mut self, buf: &[u8]) { self.inner.update(buf); } fn finalize( &mut self, _tmpfile: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error> { + ) -> Result<(Self::Output, Option), Self::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); - let subdirs = PathBuf::from(subdir1).join(subdir2); - if !subdirs.exists() { - std::fs::create_dir_all(&subdirs)?; + let dir = PathBuf::from(subdir1).join(subdir2); + if !dir.exists() { + std::fs::create_dir_all(&dir)?; } - Ok((hash, Some(subdirs.join(fname)))) + Ok((hash, Some(dir.join(fname)))) } } - pub(super) fn main() { + pub fn main() { let mut buf = vec![0u8; 65536]; rand::thread_rng().fill(&mut buf[..]); let tp = Hasher { inner: Sha256::new() Index: examples/minsize.rs ================================================================== --- examples/minsize.rs +++ examples/minsize.rs @@ -6,40 +6,52 @@ use tmpfile::{TmpFile, TmpProc}; use sha2::{Digest, Sha256}; use rand::Rng; + +#[derive(Debug)] +enum MyError { + IO(()) +} + +impl From for MyError { + fn from(_err: std::io::Error) -> Self { + Self::IO(()) + } +} struct Hasher { inner: Sha256 } impl TmpProc for Hasher { type Output = Vec; + type Error = MyError; /// Called when a buffer is about to be written. fn update(&mut self, buf: &[u8]) { self.inner.update(buf); } fn finalize( &mut self, tmpfile: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error> { + ) -> Result<(Self::Output, Option), Self::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); if tmpfile.is_some() { // The temporary file is stored in the file system. // We need to return a target location for it. let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); - let subdirs = PathBuf::from(subdir1).join(subdir2); - if !subdirs.exists() { - std::fs::create_dir_all(&subdirs)?; + let dir = PathBuf::from(subdir1).join(subdir2); + if !dir.exists() { + std::fs::create_dir_all(&dir)?; } - Ok((hash, Some(subdirs.join(fname)))) + Ok((hash, Some(dir.join(fname)))) } else { // The content is stored in memory Ok((hash, None)) } } Index: examples/simple.rs ================================================================== --- examples/simple.rs +++ examples/simple.rs @@ -6,38 +6,50 @@ use tmpfile::{TmpFile, TmpProc}; use sha2::{Digest, Sha256}; use rand::Rng; + +#[derive(Debug)] +enum MyError { + IO(()) +} + +impl From for MyError { + fn from(_err: std::io::Error) -> Self { + Self::IO(()) + } +} struct Hasher { inner: Sha256 } impl TmpProc for Hasher { type Output = Vec; + type Error = MyError; /// Called when a buffer is about to be written. fn update(&mut self, buf: &[u8]) { self.inner.update(buf); } fn finalize( &mut self, _tmpfile: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error> { + ) -> Result<(Self::Output, Option), Self::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); let (subdir2, fname) = rest.split_at(2); - let subdirs = PathBuf::from(subdir1).join(subdir2); - if !subdirs.exists() { - std::fs::create_dir_all(&subdirs)?; + let dir = PathBuf::from(subdir1).join(subdir2); + if !dir.exists() { + std::fs::create_dir_all(&dir)?; } - Ok((hash, Some(subdirs.join(fname)))) + Ok((hash, Some(dir.join(fname)))) } } fn main() { Index: src/lib.rs ================================================================== --- src/lib.rs +++ src/lib.rs @@ -1,21 +1,61 @@ -//! Representation of a temporary file that can be persisted. -//! -//! This is intended to be used in an application that needs to receive a file -//! to persistent storage, but doesn't know its final target file name until it -//! has received the entire file (such as if the file's name should be its -//! content's hash). -//! -//! # Minimum size -//! In some cases an application may not want to store small files in its -//! filesystem based data store. For this purpose, the `TmpFile` can be set up -//! to have a minimum file size. If a `TmpFile` does not reach this size -//! before being persisted, a buffer of the file's contents will be returned -//! instead of a file name of the persisted file. +//! Temporary file object that can be persisted. +//! +//! # Usage overview +//! The main type of this crate is [`TmpFile`], which represents a (temporary) +//! file being written to, that may optionally be persisted (or it will be +//! removed). +//! +//! To use `TmpFile` an application must first implement the [`TmpProc`] trait +//! on a type. [`TmpProc::update()`] will be called each time a block of data +//! is written to the `TmpFile`. [`TmpProc::finalize()`] will be called if the +//! application chooses to persist the `TmpFile`. +//! +//! When a `TmpFile` is created, the application must pass along the file's +//! _temporary_ location and an object whose type implements `TmpProc`. The +//! instantiated `TmpFile` object implements [`std::io::Write`], which is used +//! to write data to it. +//! +//! If the entire file can not be completed, the `TmpFile` object is dropped, +//! which will automatically remove the temporary file. +//! +//! If the entire file has been written, and the application wants to persist +//! it, it calls [`TmpFile::persist()`]. This will call the +//! `TmpProc::finalize()` trait method, whose responsibility it is to return +//! the file's persistent location (and application-defined data). Information +//! about the persisted file is finally returned to the application via an +//! instantion of [`Persisted`]. +//! +//! # "Small file" special case +//! An application may not want to store small files in its filesystem. +//! For this purpose, the `TmpFile` can be set up to have a minimum file size. +//! If a `TmpFile` does not reach this size before being persisted, a memory +//! buffer of the file's contents will be returned instead of a file name of +//! the persisted file. //! //! The [`TmpFile::with_minsize()`] factory method can be used to use this //! feature. +//! +//! # Deferred persist +//! There may be cases where it's impractical to call [`TmpFile::persist()`] on +//! a `TmpFile`, but where the originator of the `TmpFile` wants to manage the +//! results from the `TmpFile` when it is pesisted. This crate has means to +//! handle such situations, but it drastically changes the semantics of +//! `TmpFile`: The `defer_persist()` method returns a wait context that can be +//! used to wait for the `TmpFile` to finalize and send its results. In this +//! scenario, the finalization occurrs implicitly when the `TmpFile` is +//! dropped. +//! +//! This means that deferred persist shifts the default assumption of +//! "drop-before-persist implies failure" to "drop means persist", with no +//! means to trigger "abort without finalization" (unless the `TmpProc`'s +//! finalization is able to determine that the file is incomplete). +//! +//! # Features +//! | Feature | Function +//! |-----------------|---------- +//! | `defer-persist` | Allow `Drop` to finalize `TmpFile`. #![cfg_attr(docsrs, feature(doc_cfg))] use std::{ fs, @@ -23,13 +63,18 @@ path::{Path, PathBuf}, time::{Duration, Instant} }; -/// Used to inspect content as it is being fed to the temporary file. +/// Used to inspect contents as it is being fed to the temporary file and to +/// finalize the temporary file when it is being persisted. pub trait TmpProc { + /// Application-defined data to be returned on successful finalization. type Output; + + /// Application-defined error type. + type Error; /// Called when a buffer has been written to the `TmpFile` storage. fn update(&mut self, buf: &[u8]); /// Called when the application has chosen to persist the file. @@ -40,31 +85,35 @@ /// and the implementation of this method should return, as the second /// tuple member, `Some(PathBuf)`, pointing out the target file that the /// temporary file should be persisted to. If `src` is `None` the /// temporary buffer is not stored in the file system and thus `None` /// should be returned instead. + /// + /// # Errors + /// Returns application-specific errors. fn finalize( &mut self, src: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error>; + ) -> Result<(Self::Output, Option), Self::Error>; } /// A [`TmpProc`] implementation which does nothing. pub struct NullProc<'a>(&'a Path); impl TmpProc for NullProc<'_> { type Output = (); + type Error = (); #[allow(unused_variables)] fn update(&mut self, buf: &[u8]) {} #[allow(unused_variables)] fn finalize( &mut self, src: Option<&Path> - ) -> Result<(Self::Output, Option), std::io::Error> { + ) -> Result<(Self::Output, Option), Self::Error> { Ok(((), Some(self.0.to_path_buf()))) } } /// Temporary file contents container returned after successful persist. @@ -79,33 +128,51 @@ /// This variant can only occur if a minimum size threshold has been set. Buf(Vec) } impl Output { - pub fn try_into_fname(self) -> Result { + /// Fallibly convert `Output` to a `PathBuf`. + /// + /// # Errors + /// If the `Output` does not represent a file name, then return the `Output`. + pub fn try_into_fname(self) -> Result { match self { Self::File(fname) => Ok(fname), - r => Err(r) + r @ Self::Buf(_) => Err(r) } } + /// Unwrap `PathBuf`. + /// + /// # Panics + /// The `Output` must represent a file name. + #[must_use] pub fn unwrap_fname(self) -> PathBuf { - let Output::File(fname) = self else { + let Self::File(fname) = self else { panic!("Not a file name"); }; fname } - pub fn try_into_buf(self) -> Result, Output> { + /// Fallibly convert `Output` to a buffer. + /// + /// # Errors + /// If the `Output` does not represent a buffer, then return the `Output`. + pub fn try_into_buf(self) -> Result, Self> { match self { Self::Buf(buf) => Ok(buf), - r => Err(r) + r @ Self::File(_) => Err(r) } } + /// Unwrap buffer. + /// + /// # Panics + /// The `Output` must represent a buffer. + #[must_use] pub fn unwrap_buf(self) -> Vec { - let Output::Buf(buf) = self else { + let Self::Buf(buf) = self else { panic!("Not a buffer"); }; buf } } @@ -138,24 +205,30 @@ struct MemBuf { buf: Vec, idx: usize } -/// Temporary file contents generator. -pub struct TmpFile { +/// File writer used to write to a temporary file that can be persisted. +pub struct TmpFile +where + E: From +{ tmpfile: PathBuf, f: Option>, - tp: Box + Send>, + tp: Box + Send>, size: u64, start_time: Instant, membuf: Option, #[cfg(feature = "defer-persist")] - sctx: Option, (), std::io::Error>> + sctx: Option, (), E>> } -impl TmpFile { - fn inner_persist(&mut self) -> Result, std::io::Error> { +impl TmpFile +where + E: From +{ + fn inner_persist(&mut self) -> Result, E> { // Force close file, if open if let Some(f) = self.f.take() { drop(f); } @@ -188,20 +261,27 @@ Ok(Persisted { output, size: self.size, procres: t, - duration: Instant::now() - self.start_time + duration: self.start_time.elapsed() }) } } -impl TmpFile { +impl TmpFile +where + E: From +{ /// Create a new [`TmpFile`]. + /// + /// # Errors + /// If the temporary file could not be opened for writing `std::io::Error` is + /// returned. pub fn new

( fname: P, - tp: Box + Send> + tp: Box + Send> ) -> Result where P: AsRef { let tmpfile = fname.as_ref().to_path_buf(); @@ -219,13 +299,17 @@ }) } /// Create a new [`TmpFile`] that will not write to file unless the size /// exceeds a specified size. + /// + /// # Errors + /// If the temporary file could not be opened for writing `std::io::Error` is + /// returned. pub fn with_minsize

( fname: P, - tp: Box + Send>, + tp: Box + Send>, minsize: usize ) -> Result where P: AsRef { @@ -252,27 +336,29 @@ /// Persist the hitherto temporary file. /// /// The location of the persisted file will be determined by the [`TmpProc`] /// object that was passed into [`TmpFile::new()`]. + /// + /// # Errors + /// If it was not possible to persist, the application-defined error `E` will + /// be returned. #[cfg_attr( feature = "defer-persist", doc = r#" -# Panic -If the `TmpFile` has previously registered to receive the finalization -results via a channel using [`TmpFile::defer_persist()`] this method will -cause a panic. +# Panics +If the `defer-persist` feature is used: If the `TmpFile` has previously +registered to receive the finalization results via a channel using +[`TmpFile::defer_persist()`] this method will cause a panic. "# )] - pub fn persist(mut self) -> Result, std::io::Error> { + pub fn persist(mut self) -> Result, E> { #[cfg(feature = "defer-persist")] - if self.sctx.is_some() { - panic!( - "Con not persist TmpFile that has been configured for deferred \ - persist" - ); - } + assert!( + self.sctx.is_none(), + "Con not persist TmpFile that has been configured for deferred persist" + ); self.inner_persist() } /// Persist this temporary file on `Drop`, but report the finalized results @@ -285,21 +371,20 @@ /// When using a deferred persist, the semantics of the `TmpFile` changes /// from "assume failure" to "assume success". If an error occurs which /// should cause the temporary file to no longer be persisted, the owner of /// the `TmpFile` must call [`TmpFile::cancel()`] on it. /// - /// # Panic + /// # Panics /// This method must only be called once per `TmpFile` object. Calling it /// a second time will cause a panic. #[cfg(feature = "defer-persist")] #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))] - pub fn defer_persist( - &mut self - ) -> swctx::WaitCtx, (), std::io::Error> { - if self.sctx.is_some() { - panic!("TmpFile already configured for deferred persist"); - } + pub fn defer_persist(&mut self) -> swctx::WaitCtx, (), E> { + assert!( + self.sctx.is_none(), + "TmpFile already configured for deferred persist" + ); let (sctx, wctx) = swctx::mkpair(); self.sctx = Some(sctx); @@ -315,11 +400,14 @@ let _ = self.sctx.take(); } } -impl Write for TmpFile { +impl Write for TmpFile +where + E: From +{ fn write(&mut self, buf: &[u8]) -> Result { // If there's a memory buffer, then append to it. Unless this write would // overflow the membuf, in which case switch to using a file. if let Some(ref mut membuf) = self.membuf { if membuf.idx + buf.len() > membuf.buf.len() { @@ -353,10 +441,12 @@ return Ok(buf.len()); } } + // At this point the Tmpfile is writing to a file in the file system. The + // memory buffer case should have returned early. let Some(ref mut f) = self.f else { panic!("No file?"); }; let n = f.write(buf)?; @@ -372,11 +462,14 @@ }; Ok(()) } } -impl Drop for TmpFile { +impl Drop for TmpFile +where + E: From +{ fn drop(&mut self) { // Close file if it hasn't been already if let Some(f) = self.f.take() { drop(f); } @@ -383,12 +476,18 @@ // If deferred persist has been requested, then handle it here. #[cfg(feature = "defer-persist")] if let Some(sctx) = self.sctx.take() { match self.inner_persist() { - Ok(res) => sctx.set(res), - Err(e) => sctx.fail(e) + Ok(res) => { + // ToDo: Log error? + let _ = sctx.set(res); + } + Err(e) => { + // ToDo: Log error? + let _ = sctx.fail(e); + } } } if let Err(_e) = fs::remove_file(&self.tmpfile) { // ToDo: Log error? Index: www/changelog.md ================================================================== --- www/changelog.md +++ www/changelog.md @@ -1,10 +1,28 @@ # Change Log ## [Unreleased] -[Details](/vdiff?from=tmpfile-0.0.1&to=trunk) +[Details](/vdiff?from=tmpfile-0.0.2&to=trunk) + +### Added + +### Changed + +- Previously several error cases returned `std::io::Error`. Now use an + application-specific error type: The `TmpProc` trait now has an `Error` + type, and `TmpFile` uses a generic `E`, which has a `From` + bound. +- Update to `swctx` `0.3.0`. + +### Removed + +--- + +## [0.0.2] - 2024-01-30 + +[Details](/vdiff?from=tmpfile-0.0.1&to=tmpfile-0.0.2) ### Added - Allow a "minimum file content size" to be specified. If, at the point of persisting a `TmpFile` the total size of it is smaller than the configured Index: www/index.md ================================================================== --- www/index.md +++ www/index.md @@ -1,11 +1,25 @@ # tmpfile `TmpFile` is an object representing a temporary file that can optionally be -persisted. It can be useful if a file being received over the network should -be named according to its contents, but the name can not be determined until -the entire file has been received. +persisted to a new location. + +This library was created to provide a prepackaged solution where: + +- An application wants to write a new (temporary) file, but wants it to be + automatically removed if not explicitly requested to persist. (For instance, + receive a file over network, and automatically remove it if the connection is + lost before the entire file could be received). +- The contents of a file is inspected as it is being received. (For instance, + while receiving a file over a network, calculate the file's hash as it is + being received). +- An application may want the file to be written to a temporary location, and + only moved to its permanent name once the entire file has been received. + (For instance, the name of the file may be its hash, which is unavailable + until the entire file has been processed). +- A small file isn't written to the file system at all, and is returned as a + memory buffer instead. ## Feature labels in documentation The crate's documentation uses automatically generated feature labels, which