package compbio.data.sequence;

import compbio.util.Util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/* loaded from: input_file:compbio/data/sequence/SequenceUtil.class */
public final class SequenceUtil {
    public static final Pattern WHITE_SPACE;
    public static final Pattern DIGIT;
    public static final Pattern NONWORD;
    public static final Pattern AA;
    public static final Pattern NON_AA;
    public static final Pattern AMBIGUOUS_AA;
    public static final Pattern NUCLEOTIDE;
    public static final Pattern AMBIGUOUS_NUCLEOTIDE;
    public static final Pattern NON_NUCLEOTIDE;
    private static final String JRONN_WRONG_FORMAT_MESSAGE = "Jronn file must be in the following format:\n>sequence_name\n M\tV\tS\n0.43\t0.22\t0.65\nWhere first line is the sequence name,\nsecond line is the tab delimited sequence,\nthird line contains tab delimited disorder prediction values.\nNo lines are allowed between these three. Additionally, the number of  sequence residues must be equal to the number of the disorder values.";
    static final /* synthetic */ boolean $assertionsDisabled;

    private SequenceUtil() {
    }

    public static boolean isNucleotideSequence(FastaSequence fastaSequence) {
        return isNonAmbNucleotideSequence(fastaSequence.getSequence());
    }

    public static boolean isNonAmbNucleotideSequence(String str) {
        String cleanSequence = cleanSequence(str);
        if (DIGIT.matcher(cleanSequence).find() || NON_NUCLEOTIDE.matcher(cleanSequence).find()) {
            return false;
        }
        return NUCLEOTIDE.matcher(cleanSequence).find();
    }

    public static String cleanSequence(String str) {
        if ($assertionsDisabled || str != null) {
            return WHITE_SPACE.matcher(str).replaceAll("").toUpperCase();
        }
        throw new AssertionError();
    }

    public static String deepCleanSequence(String str) {
        return Pattern.compile("[_-]+").matcher(NONWORD.matcher(DIGIT.matcher(cleanSequence(str)).replaceAll("")).replaceAll("")).replaceAll("");
    }

    public static String cleanProteinSequence(String str) {
        return NON_AA.matcher(str).replaceAll("");
    }

    public static boolean isProteinSequence(String str) {
        String cleanSequence = cleanSequence(str);
        if (isNonAmbNucleotideSequence(cleanSequence) || DIGIT.matcher(cleanSequence).find() || NON_AA.matcher(cleanSequence).find()) {
            return false;
        }
        return AA.matcher(cleanSequence).find();
    }

    public static boolean isAmbiguosProtein(String str) {
        String cleanSequence = cleanSequence(str);
        if (isNonAmbNucleotideSequence(cleanSequence) || DIGIT.matcher(cleanSequence).find() || NON_AA.matcher(cleanSequence).find() || AA.matcher(cleanSequence).find()) {
            return false;
        }
        return AMBIGUOUS_AA.matcher(cleanSequence).find();
    }

    public static void writeFasta(OutputStream outputStream, List<FastaSequence> list, int i) throws IOException {
        writeFastaKeepTheStream(outputStream, list, i);
        outputStream.close();
    }

    public static void writeFastaKeepTheStream(OutputStream outputStream, List<FastaSequence> list, int i) throws IOException {
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream);
        BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter);
        for (FastaSequence fastaSequence : list) {
            bufferedWriter.write(">" + fastaSequence.getId() + "\n");
            bufferedWriter.write(fastaSequence.getFormatedSequence(i));
            bufferedWriter.write("\n");
        }
        bufferedWriter.flush();
        outputStreamWriter.flush();
    }

    public static List<FastaSequence> readFasta(InputStream inputStream) throws IOException {
        String readLine;
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF8"), 16000);
        Pattern compile = Pattern.compile("//s+");
        String str = "";
        String str2 = null;
        do {
            readLine = bufferedReader.readLine();
            if (readLine == null || readLine.startsWith(">")) {
                if (str2 != null) {
                    arrayList.add(new FastaSequence(str.substring(1), str2));
                }
                str = readLine;
                str2 = "";
            } else {
                str2 = str2 + compile.matcher(readLine).replaceAll("");
            }
        } while (readLine != null);
        bufferedReader.close();
        return arrayList;
    }

    public static void writeFasta(OutputStream outputStream, List<FastaSequence> list) throws IOException {
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream);
        BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter);
        Iterator<FastaSequence> it = list.iterator();
        while (it.hasNext()) {
            bufferedWriter.write(it.next().getOnelineFasta());
        }
        bufferedWriter.close();
        outputStreamWriter.close();
    }

    public static Map<String, Score> readJRonn(File file) throws IOException, UnknownFileFormatException {
        FileInputStream fileInputStream = new FileInputStream(file);
        Map<String, Score> readJRonn = readJRonn(fileInputStream);
        fileInputStream.close();
        return readJRonn;
    }

    public static Map<String, Score> readJRonn(InputStream inputStream) throws IOException, UnknownFileFormatException {
        String readLine;
        HashMap hashMap = new HashMap();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF8"), 16000);
        do {
            readLine = bufferedReader.readLine();
            if (readLine != null && !readLine.isEmpty() && readLine.startsWith(">")) {
                String substring = readLine.trim().substring(1);
                String replace = bufferedReader.readLine().replace("\t", "");
                readLine = bufferedReader.readLine();
                float[] convertToNumber = convertToNumber(readLine.split("\t"));
                if (convertToNumber.length != replace.length()) {
                    throw new UnknownFileFormatException("File does not look like Jronn horizontally formatted output file!\nJronn file must be in the following format:\n>sequence_name\n M\tV\tS\n0.43\t0.22\t0.65\nWhere first line is the sequence name,\nsecond line is the tab delimited sequence,\nthird line contains tab delimited disorder prediction values.\nNo lines are allowed between these three. Additionally, the number of  sequence residues must be equal to the number of the disorder values.");
                }
                hashMap.put(substring, new Score(DisorderMethod.JRonn, convertToNumber));
            }
        } while (readLine != null);
        bufferedReader.close();
        return hashMap;
    }

    private static float[] convertToNumber(String[] strArr) throws UnknownFileFormatException {
        float[] fArr = new float[strArr.length];
        for (int i = 0; i < fArr.length; i++) {
            try {
                fArr[i] = Float.parseFloat(strArr[i]);
            } catch (NumberFormatException e) {
                throw new UnknownFileFormatException(JRONN_WRONG_FORMAT_MESSAGE, e.getCause());
            }
        }
        return fArr;
    }

    public static final void closeSilently(Logger logger, Closeable closeable) {
        if (closeable != null) {
            try {
                closeable.close();
            } catch (IOException e) {
                logger.log(Level.WARNING, e.getLocalizedMessage(), e.getCause());
            }
        }
    }

    public static HashMap<String, Set<Score>> readDisembl(InputStream inputStream) throws IOException, UnknownFileFormatException {
        Scanner scanner = new Scanner(inputStream);
        scanner.useDelimiter(">");
        if (!scanner.hasNext()) {
            throw new UnknownFileFormatException("In Disembl score format each sequence score is expected to start from the line: >Sequence name  No such line was found!");
        }
        HashMap<String, Set<Score>> hashMap = new HashMap<>();
        int i = 0;
        while (scanner.hasNext()) {
            i++;
            Scanner scanner2 = new Scanner(scanner.next());
            if (!scanner2.hasNextLine()) {
                throw new RuntimeException("The input looks like an incomplete disembl file - cannot parse!");
            }
            StringBuffer stringBuffer = new StringBuffer();
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            ArrayList arrayList3 = new ArrayList();
            String trim = scanner2.nextLine().trim();
            TreeSet<Range> parseRanges = parseRanges(DisemblResult.COILS, scanner2.nextLine());
            TreeSet<Range> parseRanges2 = parseRanges(DisemblResult.REM465, scanner2.nextLine());
            TreeSet<Range> parseRanges3 = parseRanges(DisemblResult.HOTLOOPS, scanner2.nextLine());
            String nextLine = scanner2.nextLine();
            if (!$assertionsDisabled && !nextLine.startsWith("# RESIDUE COILS REM465 HOTLOOPS")) {
                throw new AssertionError(">Sequence_name must follow column title: # RESIDUE COILS REM465 HOTLOOPS!");
            }
            while (scanner2.hasNext()) {
                stringBuffer.append(scanner2.next());
                arrayList.add(Float.valueOf(scanner2.nextFloat()));
                arrayList2.add(Float.valueOf(scanner2.nextFloat()));
                arrayList3.add(Float.valueOf(scanner2.nextFloat()));
            }
            HashSet hashSet = new HashSet();
            hashSet.add(new Score(DisemblResult.COILS, arrayList, parseRanges));
            hashSet.add(new Score(DisemblResult.HOTLOOPS, arrayList3, parseRanges2));
            hashSet.add(new Score(DisemblResult.REM465, arrayList2, parseRanges3));
            hashMap.put(trim, hashSet);
            scanner2.close();
        }
        scanner.close();
        inputStream.close();
        return hashMap;
    }

    private static TreeSet<Range> parseRanges(Enum r6, String str) {
        TreeSet<Range> treeSet = new TreeSet<>();
        Scanner scanner = new Scanner(str);
        if (!$assertionsDisabled && !scanner.hasNext()) {
            throw new AssertionError();
        }
        String next = scanner.next();
        if (!$assertionsDisabled && !"#".equals(next)) {
            throw new AssertionError();
        }
        String next2 = scanner.next();
        if (!$assertionsDisabled && !r6.toString().equalsIgnoreCase(next2)) {
            throw new AssertionError("Unknown result type: " + r6.toString());
        }
        scanner.useDelimiter(",");
        while (scanner.hasNext()) {
            String next3 = scanner.next();
            if (!Util.isEmpty(next3)) {
                treeSet.add(new Range(next3.split("-")));
            }
        }
        return treeSet;
    }

    public static HashMap<String, Set<Score>> readGlobPlot(InputStream inputStream) throws IOException, UnknownFileFormatException {
        Scanner scanner = new Scanner(inputStream);
        scanner.useDelimiter(">");
        if (!scanner.hasNext()) {
            throw new UnknownFileFormatException("In GlobPlot score format each sequence score is expected to start from the line: >Sequence name  No such line was found!");
        }
        HashMap<String, Set<Score>> hashMap = new HashMap<>();
        int i = 0;
        while (scanner.hasNext()) {
            i++;
            Scanner scanner2 = new Scanner(scanner.next());
            if (!scanner2.hasNextLine()) {
                throw new RuntimeException("The input looks like an incomplete GlobPlot file - cannot parse!");
            }
            StringBuffer stringBuffer = new StringBuffer();
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            ArrayList arrayList3 = new ArrayList();
            String trim = scanner2.nextLine().trim();
            TreeSet<Range> parseRanges = parseRanges(GlobProtResult.GlobDoms, scanner2.nextLine());
            TreeSet<Range> parseRanges2 = parseRanges(GlobProtResult.Disorder, scanner2.nextLine());
            String nextLine = scanner2.nextLine();
            if (!$assertionsDisabled && !nextLine.startsWith("# RESIDUE\tDYDX")) {
                throw new AssertionError(">Sequence_name must follow column title: # RESIDUE DYDX RAW SMOOTHED!");
            }
            while (scanner2.hasNext()) {
                stringBuffer.append(scanner2.next());
                arrayList.add(Float.valueOf(scanner2.nextFloat()));
                arrayList2.add(Float.valueOf(scanner2.nextFloat()));
                arrayList3.add(Float.valueOf(scanner2.nextFloat()));
            }
            HashSet hashSet = new HashSet();
            hashSet.add(new Score(GlobProtResult.Disorder, parseRanges2));
            hashSet.add(new Score(GlobProtResult.GlobDoms, parseRanges));
            hashSet.add(new Score(GlobProtResult.Dydx, (ArrayList<Float>) arrayList));
            hashSet.add(new Score(GlobProtResult.RawScore, (ArrayList<Float>) arrayList2));
            hashSet.add(new Score(GlobProtResult.SmoothedScore, (ArrayList<Float>) arrayList3));
            hashMap.put(trim, hashSet);
            scanner2.close();
        }
        scanner.close();
        inputStream.close();
        return hashMap;
    }

    public static HashSet<Score> readAAConResults(InputStream inputStream) {
        if (inputStream == null) {
            throw new NullPointerException("InputStream with results must be provided");
        }
        HashSet<Score> hashSet = new HashSet<>();
        Scanner scanner = new Scanner(inputStream);
        scanner.useDelimiter("#");
        while (scanner.hasNext()) {
            String next = scanner.next();
            int indexOf = next.indexOf(" ");
            if (!$assertionsDisabled && indexOf <= 0) {
                throw new AssertionError("Space is expected as delimited between method name and values!");
            }
            String substring = next.substring(0, indexOf);
            ConservationMethod method = ConservationMethod.getMethod(substring);
            if (!$assertionsDisabled && method == null) {
                throw new AssertionError("Method " + substring + " is not recognized! ");
            }
            Scanner scanner2 = new Scanner(next.substring(indexOf));
            ArrayList arrayList = new ArrayList();
            while (scanner2.hasNextDouble()) {
                arrayList.add(Float.valueOf(Double.valueOf(scanner2.nextDouble()).floatValue()));
            }
            hashSet.add(new Score(method, (ArrayList<Float>) arrayList));
        }
        return hashSet;
    }

    public static List<FastaSequence> openInputStream(String str) throws IOException, UnknownFileFormatException {
        FileInputStream fileInputStream = new FileInputStream(str);
        FileInputStream fileInputStream2 = new FileInputStream(str);
        return ClustalAlignmentUtil.isValidClustalFile(fileInputStream) ? ClustalAlignmentUtil.readClustalFile(fileInputStream2).getSequences() : readFasta(fileInputStream2);
    }

    static {
        $assertionsDisabled = !SequenceUtil.class.desiredAssertionStatus();
        WHITE_SPACE = Pattern.compile("\\s");
        DIGIT = Pattern.compile("\\d");
        NONWORD = Pattern.compile("\\W");
        AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYV]+", 2);
        NON_AA = Pattern.compile("[^ARNDCQEGHILKMFPSTWYV]+", 2);
        AMBIGUOUS_AA = Pattern.compile("[ARNDCQEGHILKMFPSTWYVXU]+", 2);
        NUCLEOTIDE = Pattern.compile("[AGTCU]+", 2);
        AMBIGUOUS_NUCLEOTIDE = Pattern.compile("[AGTCRYMKSWHBVDNU]+", 2);
        NON_NUCLEOTIDE = Pattern.compile("[^AGTCU]+", 2);
    }
}
