Index: .efiles ================================================================== --- .efiles +++ .efiles @@ -3,5 +3,6 @@ www/index.md www/changelog.md src/lib.rs examples/simple.rs examples/defer.rs +examples/minsize.rs Index: Cargo.toml ================================================================== --- Cargo.toml +++ Cargo.toml @@ -1,8 +1,8 @@ [package] name = "tmpfile" -version = "0.0.1" +version = "0.0.2" edition = "2021" license = "0BSD" categories = [ "filesystem" ] keywords = [ "tmpfile", "tempfile" ] repository = "https://repos.qrnch.tech/pub/tmpfile" Index: examples/defer.rs ================================================================== --- examples/defer.rs +++ examples/defer.rs @@ -1,83 +1,82 @@ #[cfg(feature = "defer-persist")] -use std::{ - io::Write, - path::{Path, PathBuf}, - thread -}; - -#[cfg(feature = "defer-persist")] -use tmpfile::{TmpFile, TmpProc}; - -#[cfg(feature = "defer-persist")] -use sha2::{Digest, Sha256}; - -#[cfg(feature = "defer-persist")] -use rand::Rng; - -#[cfg(feature = "defer-persist")] -struct Hasher { - inner: Sha256 -} - -#[cfg(feature = "defer-persist")] -impl TmpProc for Hasher { - type Output = Vec; - - /// Called when a buffer is about to be written. - fn update(&mut self, buf: &[u8]) { - self.inner.update(buf); - } - - fn finalize( - &mut self, - _tmpfile: &Path - ) -> Result<(Self::Output, PathBuf), std::io::Error> { - let result = self.inner.clone().finalize(); - let hash = result.to_vec(); - - let hexhash = hex::encode(&hash); - let (subdir1, rest) = hexhash.split_at(2); - let (subdir2, fname) = rest.split_at(2); - let subdirs = PathBuf::from(subdir1).join(subdir2); - if !subdirs.exists() { - std::fs::create_dir_all(&subdirs)?; - } - Ok((hash, subdirs.join(fname))) - } -} - - -#[cfg(feature = "defer-persist")] -fn main() { - let mut buf = vec![0u8; 65536]; - rand::thread_rng().fill(&mut buf[..]); - - let tp = Hasher { - inner: Sha256::new() - }; - let tp = Box::new(tp); - - let mut tmpf = TmpFile::new("tmpfile", tp).unwrap(); - let wctx = tmpf.defer_persist(); - - let mut writer: Box = Box::new(tmpf); - - let jh = thread::spawn(move || { - let n = writer.write(&buf).unwrap(); - assert_eq!(n, 65536); - }); - - // Wait for TmpFile to be dropped and send the results back - let (hash, fname) = wctx.wait().unwrap(); - let hexhash = hex::encode(hash); - println!("{} written to {:?}", hexhash, fname); - - jh.join().unwrap(); -} - -#[cfg(not(feature = "defer-persist"))] -fn main() { +mod inner { + use std::{ + io::Write, + path::{Path, PathBuf}, + thread + }; + + use tmpfile::{TmpFile, TmpProc}; + + use sha2::{Digest, Sha256}; + + use rand::Rng; + + struct Hasher { + inner: Sha256 + } + + impl TmpProc for Hasher { + type Output = Vec; + + /// Called when a buffer is about to be written. + fn update(&mut self, buf: &[u8]) { + self.inner.update(buf); + } + + fn finalize( + &mut self, + _tmpfile: Option<&Path> + ) -> Result<(Self::Output, Option), std::io::Error> { + let result = self.inner.clone().finalize(); + let hash = result.to_vec(); + + let hexhash = hex::encode(&hash); + let (subdir1, rest) = hexhash.split_at(2); + let (subdir2, fname) = rest.split_at(2); + let subdirs = PathBuf::from(subdir1).join(subdir2); + if !subdirs.exists() { + std::fs::create_dir_all(&subdirs)?; + } + Ok((hash, Some(subdirs.join(fname)))) + } + } + + + pub(super) fn main() { + let mut buf = vec![0u8; 65536]; + rand::thread_rng().fill(&mut buf[..]); + + let tp = Hasher { + inner: Sha256::new() + }; + let tp = Box::new(tp); + + let mut tmpf = TmpFile::new("tmpfile", tp).unwrap(); + let wctx = tmpf.defer_persist(); + + let mut writer: Box = Box::new(tmpf); + + let jh = thread::spawn(move || { + let n = writer.write(&buf).unwrap(); + assert_eq!(n, 65536); + }); + + // Wait for TmpFile to be dropped and send the results back + let persisted = wctx.wait().unwrap(); + let hexhash = hex::encode(persisted.procres); + println!("{} written to {:?}", hexhash, persisted.output); + + jh.join().unwrap(); + } +} + +fn main() { + #[cfg(feature = "defer-persist")] + inner::main(); + + #[cfg(not(feature = "defer-persist"))] println!("example requires 'defer-persist' feature."); } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : ADDED examples/minsize.rs Index: examples/minsize.rs ================================================================== --- /dev/null +++ examples/minsize.rs @@ -0,0 +1,75 @@ +use std::{ + io::Write, + path::{Path, PathBuf} +}; + +use tmpfile::{TmpFile, TmpProc}; + +use sha2::{Digest, Sha256}; + +use rand::Rng; + +struct Hasher { + inner: Sha256 +} + +impl TmpProc for Hasher { + type Output = Vec; + + /// Called when a buffer is about to be written. + fn update(&mut self, buf: &[u8]) { + self.inner.update(buf); + } + + fn finalize( + &mut self, + tmpfile: Option<&Path> + ) -> Result<(Self::Output, Option), std::io::Error> { + let result = self.inner.clone().finalize(); + let hash = result.to_vec(); + if tmpfile.is_some() { + // The temporary file is stored in the file system. + // We need to return a target location for it. + let hexhash = hex::encode(&hash); + let (subdir1, rest) = hexhash.split_at(2); + let (subdir2, fname) = rest.split_at(2); + let subdirs = PathBuf::from(subdir1).join(subdir2); + if !subdirs.exists() { + std::fs::create_dir_all(&subdirs)?; + } + Ok((hash, Some(subdirs.join(fname)))) + } else { + // The content is stored in memory + Ok((hash, None)) + } + } +} + + +fn main() { + do_test(0); + do_test(1); + do_test(2); + do_test(3); + do_test(4); + do_test(5); +} + +fn do_test(size: usize) { + let mut buf = vec![0u8; size]; + rand::thread_rng().fill(&mut buf[..]); + + let tp = Hasher { + inner: Sha256::new() + }; + let tp = Box::new(tp); + + let mut tmpf = TmpFile::with_minsize("tmpfile", tp, 4).unwrap(); + tmpf.write_all(&buf).unwrap(); + + let persisted = tmpf.persist().unwrap(); + let hexhash = hex::encode(persisted.procres); + println!("{} written to {:?}", hexhash, persisted.output); +} + +// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : Index: examples/simple.rs ================================================================== --- examples/simple.rs +++ examples/simple.rs @@ -21,12 +21,12 @@ self.inner.update(buf); } fn finalize( &mut self, - _tmpfile: &Path - ) -> Result<(Self::Output, PathBuf), std::io::Error> { + _tmpfile: Option<&Path> + ) -> Result<(Self::Output, Option), std::io::Error> { let result = self.inner.clone().finalize(); let hash = result.to_vec(); let hexhash = hex::encode(&hash); let (subdir1, rest) = hexhash.split_at(2); @@ -33,11 +33,11 @@ let (subdir2, fname) = rest.split_at(2); let subdirs = PathBuf::from(subdir1).join(subdir2); if !subdirs.exists() { std::fs::create_dir_all(&subdirs)?; } - Ok((hash, subdirs.join(fname))) + Ok((hash, Some(subdirs.join(fname)))) } } fn main() { @@ -51,11 +51,11 @@ let mut tmpf = TmpFile::new("tmpfile", tp).unwrap(); let n = tmpf.write(&buf).unwrap(); assert_eq!(n, 65536); - let (hash, fname) = tmpf.persist().unwrap(); - let hexhash = hex::encode(hash); - println!("{} written to {:?}", hexhash, fname); + let persisted = tmpf.persist().unwrap(); + let hexhash = hex::encode(persisted.procres); + println!("{} written to {:?}", hexhash, persisted.output); } // vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 : Index: src/lib.rs ================================================================== --- src/lib.rs +++ src/lib.rs @@ -1,37 +1,53 @@ //! Representation of a temporary file that can be persisted. //! //! This is intended to be used in an application that needs to receive a file -//! to persistent storage, but doesn't know its final target file name ntil it +//! to persistent storage, but doesn't know its final target file name until it //! has received the entire file (such as if the file's name should be its //! content's hash). +//! +//! # Minimum size +//! In some cases an application may not want to store small files in its +//! filesystem based data store. For this purpose, the `TmpFile` can be set up +//! to have a minimum file size. If a `TmpFile` does not reach this size +//! before being persisted, a buffer of the file's contents will be returned +//! instead of a file name of the persisted file. +//! +//! The [`TmpFile::with_minsize()`] factory method can be used to use this +//! feature. #![cfg_attr(docsrs, feature(doc_cfg))] use std::{ fs, io::Write, - path::{Path, PathBuf} + path::{Path, PathBuf}, + time::{Duration, Instant} }; /// Used to inspect content as it is being fed to the temporary file. pub trait TmpProc { type Output; - /// Called when a buffer is about to be written. + /// Called when a buffer has been written to the `TmpFile` storage. fn update(&mut self, buf: &[u8]); /// Called when the application has chosen to persist the file. /// - /// The implementation should return the persistent location. The persistent - /// target location must reside within the same mount-point as the temporary - /// file's location. + /// The role of this method is to: + /// - Return its application-specific data of the associated type `Output`. + /// - If `src` is `Some()` it means that the `TmpFile` is backed by a file, + /// and the implementation of this method should return, as the second + /// tuple member, `Some(PathBuf)`, pointing out the target file that the + /// temporary file should be persisted to. If `src` is `None` the + /// temporary buffer is not stored in the file system and thus `None` + /// should be returned instead. fn finalize( &mut self, - src: &Path - ) -> Result<(Self::Output, PathBuf), std::io::Error>; + src: Option<&Path> + ) -> Result<(Self::Output, Option), std::io::Error>; } /// A [`TmpProc`] implementation which does nothing. pub struct NullProc<'a>(&'a Path); @@ -43,47 +59,143 @@ fn update(&mut self, buf: &[u8]) {} #[allow(unused_variables)] fn finalize( &mut self, - src: &Path - ) -> Result<(Self::Output, PathBuf), std::io::Error> { - Ok(((), self.0.to_path_buf())) + src: Option<&Path> + ) -> Result<(Self::Output, Option), std::io::Error> { + Ok(((), Some(self.0.to_path_buf()))) + } +} + +/// Temporary file contents container returned after successful persist. +#[derive(Debug)] +pub enum Output { + /// The temporary file's contents have been persisted to a file. + File(PathBuf), + + /// The temporary file's contents weren't large enough to be written to disk + /// and are returned in this buffer. + /// + /// This variant can only occur if a minimum size threshold has been set. + Buf(Vec) +} + +impl Output { + pub fn try_into_fname(self) -> Result { + match self { + Self::File(fname) => Ok(fname), + r => Err(r) + } + } + + pub fn unwrap_fname(self) -> PathBuf { + let Output::File(fname) = self else { + panic!("Not a file name"); + }; + fname + } + + pub fn try_into_buf(self) -> Result, Output> { + match self { + Self::Buf(buf) => Ok(buf), + r => Err(r) + } + } + + pub fn unwrap_buf(self) -> Vec { + let Output::Buf(buf) = self else { + panic!("Not a buffer"); + }; + buf } } + +/// The final results of successfully persisting a [`TmpFile`]. +#[non_exhaustive] +pub struct Persisted { + /// `TmpFile` output. + /// + /// If a minimum size has was set, this will be `Output::Buf()` if the size + /// is less than or equal to the minimum size. Otherwise it will be + /// `Output::File()` containing the file name of the persisted file. + /// + /// If the persisted `TmpFile` did not have a minimum file size set, the + /// output can safely be unwrapped using [`Output::unwrap_fname()`]. + pub output: Output, + + /// The size of the content written to the [`TmpFile`]. + pub size: u64, + + /// The application-defined content processor output. + pub procres: T, + + /// The amount of time that passed between initially requesting the + /// [`TmpFile`] writer and when it was finalized. + pub duration: Duration +} + + +struct MemBuf { + buf: Vec, + idx: usize +} - -/// Temporary file. +/// Temporary file contents generator. pub struct TmpFile { tmpfile: PathBuf, f: Option>, tp: Box + Send>, + size: u64, + start_time: Instant, + membuf: Option, #[cfg(feature = "defer-persist")] - sctx: Option> + sctx: Option, (), std::io::Error>> } impl TmpFile { - fn inner_persist(&mut self) -> Result<(T, PathBuf), std::io::Error> { - // Force close file + fn inner_persist(&mut self) -> Result, std::io::Error> { + // Force close file, if open if let Some(f) = self.f.take() { drop(f); } - // Tell the content processor to finalize and return the file name of the - // persistent file. - let (t, outfile) = self.tp.finalize(&self.tmpfile)?; - - // Hard link temporary file to persistent file, unless the file exists - // already. - if !outfile.exists() { - fs::hard_link(&self.tmpfile, &outfile)?; - } - - Ok((t, outfile)) + let (output, t) = if let Some(ref mut membuf) = self.membuf { + let mut buf = std::mem::take(&mut membuf.buf); + buf.truncate(membuf.idx); + + // Contents it stored in a memory buffer, so don't pass a path and do not + // expect a path in return. + let (t, _) = self.tp.finalize(None)?; + + (Output::Buf(buf), t) + } else { + // Tell the content processor to finalize and pass in the source + // temporary file, which should instruct finalize() to return the + // persisten location of the file. + let (t, outfile) = self.tp.finalize(Some(&self.tmpfile))?; + + // ToDo: Either document this panic or return an error instead. + let outfile = outfile.expect("An output file was not specified."); + + // Hard link temporary file to persistent file, unless the file exists + // already. + if !outfile.exists() { + fs::hard_link(&self.tmpfile, &outfile)?; + } + + (Output::File(outfile), t) + }; + + Ok(Persisted { + output, + size: self.size, + procres: t, + duration: Instant::now() - self.start_time + }) } } - impl TmpFile { /// Create a new [`TmpFile`]. pub fn new

( fname: P, @@ -97,14 +209,48 @@ let f = Box::new(f); Ok(Self { tmpfile, f: Some(f), tp, + size: 0, + start_time: Instant::now(), + membuf: None, + #[cfg(feature = "defer-persist")] + sctx: None + }) + } + + /// Create a new [`TmpFile`] that will not write to file unless the size + /// exceeds a specified size. + pub fn with_minsize

( + fname: P, + tp: Box + Send>, + minsize: usize + ) -> Result + where + P: AsRef + { + let tmpfile = fname.as_ref().to_path_buf(); + let f = fs::File::create(&tmpfile)?; + let f = Box::new(f); + let membuf = MemBuf { + buf: vec![0u8; minsize], + idx: 0 + }; + let membuf = Some(membuf); + Ok(Self { + tmpfile, + f: Some(f), + tp, + size: 0, + start_time: Instant::now(), + membuf, #[cfg(feature = "defer-persist")] sctx: None }) } + /// Persist the hitherto temporary file. /// /// The location of the persisted file will be determined by the [`TmpProc`] /// object that was passed into [`TmpFile::new()`]. @@ -115,11 +261,11 @@ If the `TmpFile` has previously registered to receive the finalization results via a channel using [`TmpFile::defer_persist()`] this method will cause a panic. "# )] - pub fn persist(mut self) -> Result<(T, PathBuf), std::io::Error> { + pub fn persist(mut self) -> Result, std::io::Error> { #[cfg(feature = "defer-persist")] if self.sctx.is_some() { panic!( "Con not persist TmpFile that has been configured for deferred \ persist" @@ -146,11 +292,11 @@ /// a second time will cause a panic. #[cfg(feature = "defer-persist")] #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))] pub fn defer_persist( &mut self - ) -> swctx::WaitCtx<(T, PathBuf), (), std::io::Error> { + ) -> swctx::WaitCtx, (), std::io::Error> { if self.sctx.is_some() { panic!("TmpFile already configured for deferred persist"); } let (sctx, wctx) = swctx::mkpair(); @@ -167,25 +313,66 @@ // Take out the SetCtx so the Drop handler doesn't attempt to // finalize/persist. let _ = self.sctx.take(); } } + impl Write for TmpFile { fn write(&mut self, buf: &[u8]) -> Result { - self.tp.update(buf); + // If there's a memory buffer, then append to it. Unless this write would + // overflow the membuf, in which case switch to using a file. + if let Some(ref mut membuf) = self.membuf { + if membuf.idx + buf.len() > membuf.buf.len() { + // Current write index + size of input buffer would exceed maximum + // buffer size. + + // Open temporary file and transfer the _existing_ memory buffer to it + let f = fs::File::create(&self.tmpfile)?; + let mut f = Box::new(f); + if membuf.idx > 0 { + f.write_all(&membuf.buf[..membuf.idx])?; + } + + // Store file handle in context + self.f = Some(f); + + // Clear memory buffer + self.membuf = None; + } else { + // There's still room. Append to memory buffer. + membuf.buf[membuf.idx..(membuf.idx + buf.len())].copy_from_slice(buf); + + // Move ahead write-pointer + membuf.idx += buf.len(); + + // Update total written size + self.size += buf.len() as u64; + + // Update TmpProc + self.tp.update(buf); + + return Ok(buf.len()); + } + } + let Some(ref mut f) = self.f else { - panic!("No file"); + panic!("No file?"); }; - f.write(buf) + + let n = f.write(buf)?; + self.tp.update(&buf[..n]); + self.size += n as u64; + + Ok(n) } fn flush(&mut self) -> Result<(), std::io::Error> { - let Some(ref mut f) = self.f else { - panic!("No file"); + if let Some(ref mut f) = self.f { + f.flush()?; }; - f.flush() + Ok(()) } } impl Drop for TmpFile { fn drop(&mut self) { Index: www/changelog.md ================================================================== --- www/changelog.md +++ www/changelog.md @@ -1,10 +1,30 @@ # Change Log ## [Unreleased] +[Details](/vdiff?from=tmpfile-0.0.1&to=trunk) + ### Added + +- Allow a "minimum file content size" to be specified. If, at the point of + persisting a `TmpFile` the total size of it is smaller than the configured + threshold value, the file's contents will be returned as a memory buffer + rather than to be persisted into a file. ### Changed +- TmpFile's `Write` implementation wrongly assumed that writes where complete. +- Return a `Persisted` buffer rather than a two-tuple containing the `TmpProc` + output and pathname of the persisted file. The new struct also includes size + and a duration (between writer creation and persist). +- The `TmpProc::finalize()`'s signature has changed in order to support file + contents stored in memory. + ### Removed +--- + +## [0.0.1] - 2024-01-25 + +Initial release. +