core/package_operation.js

const { Context } = require('./context');
const fs = require('fs');
const { LegacyBags } = require('../util/legacy_bags')
const { OperationResult } = require('./operation_result');
const { PluginManager } = require('../plugins/plugin_manager');
const { Util } = require('./util');

/**
 * PackageOperation contains information describing a number of files
 * to be packaged, what format they should be packed into, and where the
 * final output should be stored.
 *
 */
class PackageOperation {
    /**
     * Creates a new PackageOperation.
     *
     * @param {string} packageName - The name of the package to create.
     * Typically, this is the name of a bag, excluding any '.tar', '.zip'
     * or other file format extension.
     *
     * @param {string} outputPath - The absolute path to which the package
     * should be written.
     *
     *
     */
    constructor(packageName, outputPath) {
        /**
         * The name of the package. Usually, this will be a bag name,
         * and should not include a file extension.
         *
         * @type {string}
         */
        this.packageName = packageName;
        /**
         * The absolute path to the file or directory in which to put the
         * completed package. In other words the absolute path to the
         * final .tar, .zip, or other file DART is creating. If DART is
         * writing a bag to a directory, this path should point to that
         * directory.
         *
         * @type {string}
         */
        this.outputPath = outputPath;
        /**
         * The format of the output package. E.g. '.tar', 'directory', etc.
         *
         * DART allows a single plugin to write multiple formats,
         * and several plugins may be able to write the same format.
         * Therefore, packageFormat and packageWriter are specified
         * separately.
         *
         * {@link PluginManager} will return a list of plugins that write
         * various formats. Just call:
         *
         * <code>
         * PluginManager.getModuleCollection('FormatWriter')
         * </code>
         *
         * See also {@see packageWriter}
         *
         * @type {string}
         */
        this.packageFormat = null;
        /**
         * The serialization format of the bag to be produced. This applies
         * only when creating bags. Some profiles include an acceptSerialization
         * attribute describing what types of serialization are allowed.
         * Types typically include 'application/tar', 'application/zip', etc.
         *
         * @type {string}
         */
        this.bagItSerialization = '';
        /**
         * The id (UUID) of the plugin that will write the output package.
         *
         * DART allows a single plugin to write multiple formats,
         * and several plugins may be able to write the same format.
         * Therefore, packageFormat and packageWriter are specified
         * separately.
         *
         * {@link PluginManager} will return a list of plugins that write
         * various formats. Just call:
         *
         * <code>
         * PluginManager.getModuleCollection('FormatWriter')
         * </code>
         *
         * See also {@see packageWriter}
         *
         * @type {string}
         */
        this.pluginId = null;
        /**
         * A list of files DART will be packaging. Each entry in this list
         * should be an absolute path to a file or directory.
         *
         * @type {Array<string>}
         */
        this.sourceFiles = [];
        /**
         * This describes the result of DART's attempt to package the files.
         *
         * @type {OperationResult}
         */
        this.result = null;
        /**
         * The total size, in bytes, of the files to be packaged.
         *
         * @type {OperationResult}
         */
        this.payloadSize = 0;
        /**
         * This is a list of patterns that DART should skip when creating this
         * package. Any files matching these patterns will not be packaged.
         *
         * See {@link Constants.RE_DOT_FILES} and
         * {@link Constants.RE_MAC_JUNK_FILES}.
         *
         * @type {Array<string>}
         */
        this.skipFiles = [];
        /**
         * This indicates whether DART should trim common leading paths
         * when packaging files. Use the method {@link trimLeadingPaths}
         * instead of accessing this directory.
         *
         * Let's assume you have the following in {@link sourceFiles}:
         *
         * * '/path/to/some/dir/photos'
         * * '/path/to/some/dir/audios'
         * * '/path/to/some/dir/videos'
         *
         * These all have a common leading path of '/path/to/some/dir',
         * which can be stripped off in the packaging process. So setting
         * _trimLeadingPaths to true would lead to these files being bagged
         * as:
         *
         * * 'data/photos'
         * * 'data/audios'
         * * 'data/videos'
         *
         * If _trimLeadingPaths is false, these will be bagged as:
         *
         * * 'data/path/to/some/dir/photos'
         * * 'data/path/to/some/dir/audios'
         * * 'data/path/to/some/dir/videos'
         *
         * Note that trimLeadingPaths is useless if the files in {@link
         * sourceFiles} have no common leading path.
         *
         * @type {boolean}
         */
        this._trimLeadingPaths = true;
        /**
         * Contains information describing validation errors. Key is the
         * name of the invalid field. Value is a description of why the
         * field is not valid.
         *
         * @type {Object<string, string>}
         */
        this.errors = {};
    }

    /**
     * validate returns true or false, indicating whether this object
     * contains complete and valid data. If it returns false, check
     * the errors property for specific errors.
     *
     * @returns {boolean}
     */
    validate() {
        this.errors = {};
        if (typeof this.packageName != 'string' || Util.isEmpty(this.packageName)) {
            this.errors['PackageOperation.packageName'] = 'Package name is required.';
        }
        if (typeof this.outputPath != 'string' || Util.isEmpty(this.outputPath)) {
            this.errors['PackageOperation.outputPath'] = 'Output path is required.';
        }
        if (!Array.isArray(this.sourceFiles) || Util.isEmptyStringArray(this.sourceFiles)) {
            this.errors['PackageOperation.sourceFiles'] = 'Specify at least one file or directory to package.';
        }
        let missingFiles = [];
        for (let sourceFile of this.sourceFiles) {
            if (!fs.existsSync(sourceFile)) {
                missingFiles.push(sourceFile);
            }
        }
        if (missingFiles.length > 0) {
            this.errors['PackageOperation.sourceFiles'] = Context.y18n.__('The following files are missing: %s', missingFiles.join('; '));
        }
        return Object.keys(this.errors).length == 0;
    }

    /**
     * This removes items from the sourceFiles array if they no longer
     * exist on disk. We need to do that to prevent errors when the job
     * runs, as well as rendering errors in the UI. It logs items that
     * it removes.
     *
     * This won't prune the list if the PackageOperation has already
     * been completed, because in that case, we want a record of what
     * the operation actually did package. It's also expected that some
     * source files will be deleted after a job has run.
     *
     * Warning: The sourceFiles list should contain absolute paths.
     * Relative paths may be trimmed even if they do exist, because
     * the may have been added from a different working directory.
     * Use absolute paths only.
     */
    pruneSourceFilesUnlessJobCompleted() {
        if (this.result && this.result.succeeded()) {
            return;
        }
        if (this.sourceFiles.length == 0) {
            return;
        }
        // Iterate backwards, so the deletion doesn't throw off
        // the iterator.
        for (let i = this.sourceFiles.length; i > -1; i--) {
            let filepath = this.sourceFiles[i];
            if (!fs.existsSync(filepath)) {
                Context.logger.info(`Removing ${filepath} from items to be packaged into ${this.packageName} because it no longer exists on the filesystem.`);
                this.sourceFiles.splice(i, 1);
            }
        }
    }

    /**
     * This returns the class of the plugin that will write
     * the package. Note that each time you call this, you'll
     * get a new writer.
     *
     * If this PackageOperation has no pluginId, or an ID that does not
     * match any known plugin, this returns null.
     *
     * @returns {Plugin}
     */
    getWriter() {
        let writer = null;
        if (this.pluginId) {
            let writerClass = PluginManager.findById(this.pluginId);
            writer = new writerClass(this.outputPath);
        }
        return writer;
    }

    /**
     * Returns true or false to describe whether we should trim leading
     * path names when packaging files. Currently, only the bagger respects
     * this. This simply returns the value of {@link _trimLeadingPaths},
     * except in cases where DART thinks the user is rebagging a legacy
     * bag that was first produced with DART 1.0 and then deposited into
     * APTrust's production repo. For those bags (of which only about 30
     * exist), this always returns false.
     *
     */
    trimLeadingPaths() {
        if (LegacyBags.includes(this.packageName)) {
            return false;
        }
        return this._trimLeadingPaths;
    }


    /**
     * This converts the JSON representation of a PackageOperation to a
     * full-fledged PackageOperation object with all of the expected methods.
     *
     * @param {Object} data - A JavaScript hash.
     *
     * @returns {PackageOperation}
     */
    static inflateFrom(data) {
        // let op = "pay attention" in Dutch.
        let op = new PackageOperation();
        // tlpFunction works around a problem where jobs saved
        // prior to January 2020 have trimLeadingPaths saved
        // as a boolean in the JSON database. It should be a
        // fuction after January 2020. We don't want the bool
        // to override the function. We can remove this tlpFunction
        // hack in a few months.
        let tlpFunction = op.trimLeadingPaths;
        Object.assign(op, data);
        if (data.result) {
            op.result = OperationResult.inflateFrom(data.result);
        }
        op.trimLeadingPaths = tlpFunction;
        return op;
    }
}

module.exports.PackageOperation = PackageOperation;