bagit/bagit_file.js

const { Constants } = require('../core/constants');
const crypto = require('crypto');
const { KeyValueCollection } = require('./key_value_collection');

/**
 * BagItFile contains metadata about a file that the bagger
 * will be packaging into a bag. This metadata includes the
 * file's absolute source path, its relative path within the
 * bag, its size and checksums, and a few other bits of data.
 *
 * If the file happens to be a tag file, manifest, or tag manifest,
 * it may have additional data stored in the keyValueCollection
 * property. That data may be written into a text file during the
 * bagging process.
 *
 * @param {string} absSourcePath - The absolute source path of the
 * file. The bagger will copy the file from this path into
 * relDestPath inside the bag.
 *
 * @param {string} relDestPath - The relative path at which this
 * file should reside within the bag. For manifests, relDestPath
 * will be inside the top-level directory of the bag. For example,
 * at 'manifest-sha256.txt'. For payload files, relDestPath will
 * have the prefix 'data/'. For example, 'data/images/photo.jpg.'
 * Some bagging profiles permit tag files in subdirectories outside
 * the payload directory; hence 'dpn-data/dpn-tags.txt' may be a
 * valid relDestPath. BagItFile infers the type of the file from
 * the relDestPath param.
 *
 * @param {object} stats - A subset of stats gathered from Node's
 * fs.Stat() function. The BagItFile object keeps only a handful of
 * properties from fs.Stat(). This param can be a Node.js fs.Stats
 * object or a {@link FileStat} object.
 *
 */
class BagItFile {

    constructor(absSourcePath, relDestPath, stats) {
        /**
          * absSourcePath is the absolute source path to this file.
          * The bagger will copy the file from this path into
          * relDestPath inside the bag.
          *
          * @type {string}
          */
        this.absSourcePath = absSourcePath;
        /**
          * The relative path at which this
          * file should reside within the bag. For manifests, relDestPath
          * will be inside the top-level directory of the bag. For example,
          * at 'manifest-sha256.txt'. For payload files, relDestPath will
          * have the prefix 'data/'. For example, 'data/images/photo.jpg.'
          * Some bagging profiles permit tag files in subdirectories outside
          * the payload directory; hence 'dpn-data/dpn-tags.txt' may be a
          * valid relDestPath. BagItFile infers the type of the file from
          * the relDestPath param.
          *
          * @type {string}
          */
        this.relDestPath = relDestPath;
        /**
          * size is the size, in bytes, of the file.
          *
          * @type {number}
          */
        this.size = stats.size;
        /**
          * uid is the id of the user who owns the file.
          *
          * @type {number}
          */
        this.uid = stats.uid;
        /**
          * gid is the id of the group that owns this file.
          *
          * @type {number}
          */
        this.gid = stats.gid;
        /**
          * mtime is the time this file was last modified.
          *
          * @type {Date}
          */
        this.mtime = stats.mtime;
        /**
          * isFile will be true if this is a regular file.
          * It will be false if this is a directory, socket, or link.
          *
          * @type {boolean}
          */
        this.isFile = stats.isFile();
        /**
          * isDirectory will be true if this is a directory.
          *
          * @type {boolean}
          */
        this.isDirectory = stats.isDirectory();
        /**
          * fileType is one of 'manifest', 'tagmanifest', 'payload', or 'tagfile',
          * based on relDestPath. File types are defined in Constants.FILE_TYPES.
          *
          * @type {string}
          */
        this.fileType = BagItFile.getFileType(relDestPath);
        /**
          * checksums contains a hash of fixity values we calculate on
          * the file's contents.
          *
          * key = algorithm name ('md5', 'sha256', etc.)
          * value = digest
          *
          * @type {Object<string, string>}
          */
        this.checksums = {};
        /**
          * keyValueCollection is used by the validator to store
          * the parsed contents of tag files and manifests.
          *
          * @type {KeyValueCollection}
          */
        this.keyValueCollection = null;
    }

    /**
     * Returns the manifest entry for the specified algorithm,
     * or throws an exception if the checksum for that algorithm
     * is not present. The format of the returned string is suitable
     * for printing into a payload manifest or tag manifest.
     *
     * @param {string} algorithm - The algorithm of the digest to retrieve.
     *
     * @returns {string} - A manifest entry for this file, in the format
     * <digest> <relDestPath>.
     */
    getManifestEntry(algorithm) {
        var checksum = this.checksums[algorithm];
        if (checksum === undefined || checksum == null) {
            throw new Error(`No ${algorithm} digest for ${this.absSourcePath}`);
        }
        return `${checksum} ${this.relDestPath}`;
    }

    /**
      * Returns true if this is a payload file.
      *
      * @returns {boolean}
      */
    isPayloadFile() {
        return BagItFile.getFileType(this.relDestPath) == Constants.PAYLOAD_FILE;
    }

    /**
      * Returns true if this is a payload manifest.
      *
      * @returns {boolean}
      */
    isPayloadManifest() {
        return BagItFile.getFileType(this.relDestPath) == Constants.PAYLOAD_MANIFEST;
    }

    /**
      * Returns true if this is a tag file.
      *
      * @returns {boolean}
      */
    isTagFile() {
        return BagItFile.getFileType(this.relDestPath) == Constants.TAG_FILE;
    }

    /**
      * Returns true if this is a tag manifest.
      *
      * @returns {boolean}
      */
    isTagManifest() {
        return BagItFile.getFileType(this.relDestPath) == Constants.TAG_MANIFEST;
    }

    /**
      * This returns a crypto hash that will add a hex digest to this
      * BagItFile's checksums object upon completion. For example, adding
      * a crypto hash with algorithm 'sha512' will result in
      * bagItFile.checksums['sha512'] being populated with the file's
      * sha512 digest after you pipe the file's contents through the hash
      * object. It's up to the caller to pipe the data through.
      *
      * @param {string} algorithm - The hash digest algorithm to calculate.
      * For example, 'md5', 'sha256', 'sha512', etc.
      *
      * @param {function} done - A callback to call when hasing is complete.
      * The callback will be given data with the format:
      *
      * @example
      *
      * { absSourcePath: <bagItFile.absSourcePath>,
      *   relPath: <bagItFile.relDestPath>,
      *   algorithm: <hashing algorithm>,
      *   digest: <message digest as hex string>
      * }
      *
      */
    getCryptoHash(algorithm, done) {
        let bagItFile = this;
        let hash = crypto.createHash(algorithm);
        let cbData = {
            absSourcePath: bagItFile.absSourcePath,
            relDestPath: bagItFile.relDestPath,
            algorithm: algorithm
        }
        hash.setEncoding('hex');
        hash.on('finish', function() {
            hash.end();
            bagItFile.checksums[algorithm] = cbData.digest = hash.read();
            if (done && typeof done === 'function') {
                done(cbData);
            }
        });
        return hash;
    }

    /**
      * getFileType returns the type of BagIt file based on relDestPath.
      * File types are defined in Constants.FILE_TYPES and include
      * 'manifest', 'tagmanifest', 'payload', and 'tagfile'.
      *
      * @param {string} relDestPath - The relative path, within the bag,
      * of the file. For example, 'data/images/photo.jpg' or 'manifest-sha256.txt'.
      *
      * @returns {string}
      */
    static getFileType(relDestPath) {
        if (relDestPath.startsWith('data/')) {
            return Constants.PAYLOAD_FILE;
        } else if (relDestPath.startsWith('manifest-')) {
            return Constants.PAYLOAD_MANIFEST;
        } else if (relDestPath.startsWith('tagmanifest-')) {
            return Constants.TAG_MANIFEST;
        }
        return Constants.TAG_FILE;
    }
}


module.exports.BagItFile = BagItFile;