package NOD.cli;

import NOD.cli.ExecutorFactory;
import compbio.data.sequence.FastaReader;
import compbio.data.sequence.FastaSequence;
import compbio.data.sequence.SequenceUtil;
import compbio.util.FileUtil;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.PropertyConfigurator;

/* loaded from: input_file:NOD/cli/NolsPredictor.class */
public class NolsPredictor {
    private static final Logger log;
    private final Parameters parameters;
    private final Batchman batchman;
    private final BatchmanFormatter bm_formatter;
    private final BufferedWriter resultWriter;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:NOD/cli/NolsPredictor$OutputFormat.class */
    public enum OutputFormat {
        COMPLETE,
        FULL,
        MEDIUM,
        SHORT,
        MINIMAL;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static OutputFormat[] valuesCustom() {
            OutputFormat[] valuesCustom = values();
            int length = valuesCustom.length;
            OutputFormat[] outputFormatArr = new OutputFormat[length];
            System.arraycopy(valuesCustom, 0, outputFormatArr, 0, length);
            return outputFormatArr;
        }
    }

    /* loaded from: input_file:NOD/cli/NolsPredictor$PredictorRunner.class */
    class PredictorRunner implements Runnable {
        private final int seqnum;
        private final FastaSequence sequence;

        public PredictorRunner(FastaSequence fastaSequence, int i) {
            this.sequence = fastaSequence;
            this.seqnum = i;
        }

        @Override // java.lang.Runnable
        public void run() {
            try {
                NolsPredictor.this.bm_formatter.formatInput(this.sequence, this.seqnum);
                NolsPredictor.this.batchman.callBatchman("CurrentBatchmanInstructionsFile" + this.seqnum + ".txt");
                NolsPredictor.this.parseResults(this.sequence, this.seqnum);
            } catch (IOException e) {
                NolsPredictor.log.error(e);
            } catch (NoSuchElementException e2) {
                NolsPredictor.log.error(e2);
            }
        }
    }

    static {
        $assertionsDisabled = !NolsPredictor.class.desiredAssertionStatus();
        log = Logger.getLogger(NolsPredictor.class);
    }

    NolsPredictor(Parameters parameters) throws IOException {
        this.parameters = parameters;
        if (!$assertionsDisabled && !Batchman.isValidDirectory(parameters.workDir)) {
            throw new AssertionError();
        }
        this.bm_formatter = new BatchmanFormatter(parameters.workDir, new File(parameters.input));
        this.batchman = new Batchman(parameters.workDir, parameters.bmpath);
        this.resultWriter = new BufferedWriter(parameters.output);
    }

    void parseResults(FastaSequence fastaSequence, int i) throws NumberFormatException, IOException {
        log.debug("Iterating other sequence: " + fastaSequence.getId());
        String formatResult = new PredictionResult(fastaSequence, new File(this.parameters.workDir, "PatternFile_" + i + "_net4000run1_predictions.res")).formatResult(this.parameters.format, this.parameters.reportAll);
        if (formatResult == null) {
            return;
        }
        this.resultWriter.write(formatResult);
        this.resultWriter.newLine();
        this.resultWriter.flush();
    }

    public static void main(String[] strArr) {
        URL systemResource = ClassLoader.getSystemResource(LogManager.DEFAULT_CONFIGURATION_FILE);
        if (systemResource == null) {
            defaultLog4jConf();
            log.info("Could not find log4j.properties in the current directory using defaults.");
        } else {
            log.info("Using log4j.properties from the path: " + systemResource.getFile());
        }
        if (strArr.length == 0 || strArr.length > 8) {
            printUsageAndQuit();
        }
        log.info("Parsing command line options...");
        NolsPredictor nolsPredictor = null;
        ExecutorService executorService = null;
        FastaReader fastaReader = null;
        Parameters parameters = new Parameters(strArr);
        try {
            try {
                parameters.validateInputExitOnFailure();
                nolsPredictor = new NolsPredictor(parameters);
                log.info("Initializing the predictor...");
                ExecutorFactory.initExecutor(parameters.threadNum, ExecutorFactory.ExecutorType.SynchroniousCallerRuns);
                executorService = ExecutorFactory.getExecutor();
                fastaReader = new FastaReader(nolsPredictor.parameters.input);
                int i = 1;
                while (fastaReader.hasNext()) {
                    FastaSequence next = fastaReader.next();
                    log.info("Processing sequence " + next.getId() + " (" + i + ")");
                    String sequence = next.getSequence();
                    if (!SequenceUtil.isProteinSequence(sequence)) {
                        if (!nolsPredictor.parameters.cleanSequence) {
                            throw new AssertionError("The input sequence is NOT a protein sequence!\nThe protein sequence must only contain the following characters: " + SequenceUtil.AA + "\nThe predictor only works with protein sequences. Please use -clean_sequence option if you want the predictor to remove non protein characters from the sequence before the prediction. Problematic sequence is:\n" + next);
                        }
                        log.info("Non protein characters detected and removed from the sequence " + next.getId());
                        next = new FastaSequence(next.getId(), SequenceUtil.cleanProteinSequence(sequence));
                    }
                    nolsPredictor.getClass();
                    executorService.execute(new PredictorRunner(next, i));
                    i++;
                }
                if (nolsPredictor != null) {
                    nolsPredictor.terminateAndClose(executorService, fastaReader);
                }
            } catch (IOException e) {
                log.error("Exception in the main thread: " + e);
                System.err.println("The program cannot read/write from/to the input or output file!");
                e.printStackTrace();
                if (nolsPredictor != null) {
                    nolsPredictor.terminateAndClose(executorService, fastaReader);
                }
            }
            log.info("DONE");
        } catch (Throwable th) {
            if (nolsPredictor != null) {
                nolsPredictor.terminateAndClose(executorService, fastaReader);
            }
            throw th;
        }
    }

    private void terminateAndClose(ExecutorService executorService, FastaReader fastaReader) {
        try {
            log.info("Terminating services...");
            if (executorService != null) {
                executorService.shutdown();
                executorService.awaitTermination(5L, TimeUnit.SECONDS);
            }
            Batchman.shutdownService();
            if (fastaReader != null) {
                fastaReader.close();
            }
            FileUtil.closeSilently(log, this.resultWriter);
        } catch (InterruptedException e) {
            log.error("Exception in the main thread ");
            log.error("The execution was terminated unexpectedly! ", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void printUsageAndQuit() {
        System.out.println("\r\nNucleOlar localization sequence Detector v. 1.2b (28 April 2011) \r\nhttp://www.compbio.dundee.ac.uk/nod \r\n \r\nUsage:\r\n       java -jar clinod-1.2.jar -in=inputFile <OPTIONS>\r\n\t\t\r\n       -in=<input file>       - required, absolute path to the input file. \r\n                                The input file expected to contain the list of \r\n                                FASTA formatted sequences.  \r\n\t\r\n       -out=<output file>     - optional, by default the system prints the \r\n                                output to the console. If the file exists it \r\n                                will be overridden.\r\n\t\r\n       -f=<output format>     - optional, defaults to MEDIUM. Possible values\r\n                                COMPLETE, FULL, MEDIUM, SHORT and MINIMAL (case\r\n                                sensitive)\r\n                                \r\n                                Where MINIMAL - contains the sequence name and\r\n                                the number of NOLS detected.  \r\n                                \r\n                                SHORT - same as MINIMAL plus coordinates of \r\n                                the NOLS \r\n                                \r\n                                MEDIUM - same as SHORT plus NOLS reported \r\n                                separately  \r\n                                \r\n                                FULL - same as MEDIUM plus scores for each \r\n                                position of the sequence. The scores are \r\n                                reported for every window of size 20 in the full\r\n                                sequence, one score per line. The scores are \r\n                                associate with the first residue of the window. \r\n                                Windows with a score above 0.8 are predicted as \r\n                                NoLSs.         \r\n                                \r\n                                COMPLETE - same as FULL plus the original \r\n                                sequence                           \r\n\r\n       -d=<working directory> - optional, defaults to the system temp directory\r\n       \t\t\t\t(defined by the \"java.io.tmpdir\" environmental \r\n       \t\t\t\tvariable) If the directory does not exist, it \r\n       \t\t\t\twill be created. However the parent directory \r\n       \t\t\t\tmust exist. For example if working directory is \r\n       \t\t\t\tset to /home/tmp and home directory does not \r\n       \t\t\t\texist then the execution will be terminated. \r\n       \t\t\t\tIf the home directory exist but tmp directory \r\n       \t\t\t\tdoes not, the tmp directory will be created. \r\n       \t\t\t\tIf the /home/tmp directory exist then it will \r\n       \t\t\t\tbe used. If the /home/tmp directory is not \r\n       \t\t\t\tempty, then any files with the same name as \r\n       \t\t\t\tgenerated by the program will be overridden.    \r\n\t\r\n       -t=<number of threads> - optional, defaults to the number of cores \r\n                                available on the computer. Maximum number of \r\n                                threads cannot be greater than the number of \r\n                                available cores. If multiple threads are used \r\n                                then the order of the sequences in the output is \r\n                                not the same as in the input.  \r\n\t\t\t\t\t         \t\r\n       -bm=<batchman path>    - optional, an absolute path to the Batchman \r\n                                executable. By default this path is searched in \r\n                                the environmental variable called BATCHMAN_PATH.\r\n                                If both are provided, the path specified in the \r\n                                command line takes preference. One or the other \r\n                                must be provided.\r\n\r\n       -nonols                - optional, suppresses reporting of the proteins \r\n                                with no NOLS detected if specified. \r\n                                                                   \r\n       -clean_sequence        - optional, if specified causes the predictor to \r\n       \t\t\t\tautomatically remove all non-protein characters \r\n       \t\t\t\tfrom the sequences that contains them.\r\n       \t\t\t\t \r\n       \t\t\t\tThe predictor only works with unambiguous \r\n       \t\t\t\tprotein sequences. Only the following characters\r\n       \t\t\t\tconsidered to be valid: ARNDCQEGHILKMFPSTWYV. \r\n       \t\t\t\tIf any other character is detected in the \r\n       \t\t\t\tsequence the predictor stops the execution and \r\n       \t\t\t\traises the exception unless this flag is \r\n       \t\t\t\tspecified. In this case, the predictor removes \r\n       \t\t\t\tinvalid characters from the sequence and writes\r\n       \t\t\t\tthe information about it into the log file. \r\n                                \r\n       -h                     - print help (this information). If this flag is \r\n                                given all other options are ignored. \r\n       \r\nNolsPredictor requires SNNS Batch Interpreter V1.0 executable which is a part of \r\nthe Stuttgart Neural Network Simulator (SNNS) v 4.2 software suit \r\navailable free of charge from http://www.ra.cs.uni-tuebingen.de/SNNS \r\n\r\nThe path to the batchman executable can be specified either as BATCHMAN_PATH \r\nenvironmental variable or provided as a command line parameter.  \r\n\t\r\nInvocation examples: \r\n\r\n  Example 1: \r\n\r\n       clinod-1.2.jar -in=inputFile\r\n\r\n       The predictor loads input from inputFile, prints the predictions to the \r\n       console, uses system's temporary directory to store the temporary files \r\n       and uses the number of threads equal to the number of processor \r\n       cores available on the executing machine. This example assumes that \r\n       BATCHMAN_PATH environmental variable is defined.  \r\n\t\r\n  Example 2:\r\n    \r\n       clinod-1.2.jar -in=/homes/input.fs -out=/homes/out.txt -t=1 -d=/homes/tmp\r\n       -bm=/homes/bin/batchman -nonols -f=MINIMAL\r\n    \r\n       The predictor loads the input from the /homes/input.fs, writes output to \r\n       /homes/out.txt uses a single thread to perform calculations, writes \r\n       temporary files to /homes/tmp, uses the Batchman binary from /homes/bin\r\n       directory, reports results in the MINIMAL format and only for the \r\n       sequences where at least one NOLS has been detected.   \r\n    \r\nLogging: \r\n      \r\n       NolsPredictor uses Apache log4j library for logging. With its help it is \r\n       possible to track the execution progress or peer into the details of the \r\n       execution of the program which can help to resolve issues. For this log4j \r\n       should be configured to log all events at the INFO level. Level DEBUG or \r\n       TRACE can help if you are experiencing problems with the predictor.\r\n       \r\n       Example log4j configuration: \r\n       \r\n       logDir = .\r\n       log4j.logger.NOD=INFO, ACTIVITY\r\n       log4j.appender.ACTIVITY=org.apache.log4j.RollingFileAppender\r\n       log4j.appender.ACTIVITY.File=${logDir}/activity.log\r\n       log4j.appender.ACTIVITY.MaxFileSize=10MB\r\n       log4j.appender.ACTIVITY.MaxBackupIndex=10000\r\n       log4j.appender.ACTIVITY.layout=org.apache.log4j.PatternLayout\r\n       log4j.appender.ACTIVITY.layout.ConversionPattern=%d{MM-dd@HH:mm:ss} %-5p %3x - %m%n\r\n       \r\n       This configuration creates a log file called activity.log in the current \r\n       working directory and records all events from the program at the level \r\n       INFO. The activity file is renamed to activity.1 once it reaches the \r\n       10 Mb size and the new activity.log file is created.  \r\n       \r\n       To configure log4j, the configuration should be written into the file \r\n       called log4j.properties and put into the same directory as the program.\r\n       By default log4j is configured to output only the error messages to the \r\n       console.  \r\n       \r\nPerformance: \r\n      \r\n      NolsPredictor writes 4 temporary files for a single FASTA sequence, \r\n      therefore for optimal performance the directory for temporary file storage \r\n      should ideally be located on the local hard drive, as opposed to the \r\n      remote storage. The predictor throughput rate is about 2 sequences per \r\n      second on the average computer.   \r\n         \r\nCitation: \r\n       \r\n      Michelle S. Scott, Peter V. Troshin and Geoffrey J. Barton - \"NoD: a \r\n      Nucleolar localization sequence detector for eukaryotic and viral \r\n      proteins\" - submitted\r\n          ");
        System.exit(1);
    }

    private static void defaultLog4jConf() {
        Properties properties = new Properties();
        properties.put("log4j.logger.NOD", "ERROR, stdout");
        properties.put("log4j.appender.stdout", "org.apache.log4j.ConsoleAppender");
        properties.put("log4j.appender.stdout.Target", ConsoleAppender.SYSTEM_OUT);
        properties.put("log4j.appender.stdout.layout", "org.apache.log4j.PatternLayout");
        properties.put("log4j.appender.stdout.layout.ConversionPattern", PatternLayout.DEFAULT_CONVERSION_PATTERN);
        PropertyConfigurator.configure(properties);
    }
}
