public final class SequenceUtil
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.util.regex.Pattern |
AA
Valid Amino acids
|
static java.util.regex.Pattern |
AMBIGUOUS_AA
Same as AA pattern but with two additional letters - XU
|
static java.util.regex.Pattern |
AMBIGUOUS_NUCLEOTIDE
Ambiguous nucleotide
|
static java.util.regex.Pattern |
DIGIT
A digit
|
private static java.lang.String |
JRONN_WRONG_FORMAT_MESSAGE |
static java.util.regex.Pattern |
NON_AA
inversion of AA pattern
|
static java.util.regex.Pattern |
NON_NUCLEOTIDE
Non nucleotide
|
static java.util.regex.Pattern |
NONWORD
Non word
|
static java.util.regex.Pattern |
NUCLEOTIDE
Nucleotides a, t, g, c, u
|
static java.util.regex.Pattern |
WHITE_SPACE
A whitespace character: [\t\n\x0B\f\r]
|
Modifier | Constructor and Description |
---|---|
private |
SequenceUtil() |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
cleanProteinSequence(java.lang.String sequence)
Remove all non AA chars from the sequence
|
static java.lang.String |
cleanSequence(java.lang.String sequence)
Removes all whitespace chars in the sequence string
|
static void |
closeSilently(java.util.logging.Logger log,
java.io.Closeable stream)
Closes the Closable and logs the exception if any
|
private static float[] |
convertToNumber(java.lang.String[] annotValues) |
static java.lang.String |
deepCleanSequence(java.lang.String sequence)
Removes all special characters and digits as well as whitespace chars
from the sequence
|
static boolean |
isAmbiguosProtein(java.lang.String sequence)
Check whether the sequence confirms to amboguous protein sequence
|
static boolean |
isNonAmbNucleotideSequence(java.lang.String sequence)
Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one
(!) - B char
|
static boolean |
isNucleotideSequence(FastaSequence s) |
static boolean |
isProteinSequence(java.lang.String sequence) |
static java.util.List<FastaSequence> |
openInputStream(java.lang.String inFilePath)
Reads and parses Fasta or Clustal formatted file into a list of
FastaSequence objects
|
private static java.util.TreeSet<Range> |
parseIUPredDomains(java.util.Scanner scan)
# P53_HUMA
Number of globular domains: 2
globular domain 1.
|
private static float[] |
parseIUPredScores(java.util.Scanner scan) |
private static java.util.TreeSet<Range> |
parseRanges(java.lang.Enum resultType,
java.lang.String lines)
Parsing:
# COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343,
350-391, 429-485, 497-506, 539-547
# REM465 355-368
# HOTLOOPS 190-204
|
static java.util.HashSet<Score> |
readAAConResults(java.io.InputStream results)
Read AACon result with no alignment files.
|
static java.util.HashMap<java.lang.String,java.util.Set<Score>> |
readDisembl(java.io.InputStream input)
> Foobar_dundeefriends
# COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343
# REM465 355-368
# HOTLOOPS 190-204
# RESIDUE COILS REM465 HOTLOOPS
M 0.86010 0.88512 0.37094
T 0.79983 0.85864 0.44331
>Next Sequence name
|
static java.util.List<FastaSequence> |
readFasta(java.io.InputStream inStream)
Reads fasta sequences from inStream into the list of FastaSequence
objects
|
static java.util.HashMap<java.lang.String,java.util.Set<Score>> |
readGlobPlot(java.io.InputStream input)
> Foobar_dundeefriends
# COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343
# REM465 355-368
# HOTLOOPS 190-204
# RESIDUE COILS REM465 HOTLOOPS
M 0.86010 0.88512 0.37094
T 0.79983 0.85864 0.44331
>Next Sequence name
|
static java.util.Map<java.lang.String,Score> |
readIUPred(java.io.File result)
Read IUPred output
|
private static java.util.Map<java.lang.String,Score> |
readIUPred(java.io.InputStream input,
IUPredResult type)
## Long Disorder
# P53_HUMAN
1 M 0.9943
2 E 0.9917
3 E 0.9879
(every line)
|
static java.util.Map<java.lang.String,Score> |
readJRonn(java.io.File result) |
static java.util.Map<java.lang.String,Score> |
readJRonn(java.io.InputStream inStream)
Reader for JRonn horizontal file format
|
static void |
writeClustal(java.io.OutputStream outStream,
java.util.List<FastaSequence> sequences,
char gapChar) |
static void |
writeFasta(java.io.OutputStream os,
java.util.List<FastaSequence> sequences)
Writes FastaSequence in the file, each sequence will take one line only
|
static void |
writeFasta(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width)
Writes list of FastaSequeces into the outstream formatting the sequence
so that it contains width chars on each line
|
static void |
writeFastaKeepTheStream(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width) |
public static final java.util.regex.Pattern WHITE_SPACE
public static final java.util.regex.Pattern DIGIT
public static final java.util.regex.Pattern NONWORD
public static final java.util.regex.Pattern AA
public static final java.util.regex.Pattern NON_AA
public static final java.util.regex.Pattern AMBIGUOUS_AA
public static final java.util.regex.Pattern NUCLEOTIDE
public static final java.util.regex.Pattern AMBIGUOUS_NUCLEOTIDE
public static final java.util.regex.Pattern NON_NUCLEOTIDE
private static final java.lang.String JRONN_WRONG_FORMAT_MESSAGE
public static boolean isNucleotideSequence(FastaSequence s)
public static boolean isNonAmbNucleotideSequence(java.lang.String sequence)
public static java.lang.String cleanSequence(java.lang.String sequence)
sequence
- public static java.lang.String deepCleanSequence(java.lang.String sequence)
sequence
- public static java.lang.String cleanProteinSequence(java.lang.String sequence)
sequence
- the sequence to cleanpublic static boolean isProteinSequence(java.lang.String sequence)
sequence
- public static boolean isAmbiguosProtein(java.lang.String sequence)
sequence
- public static void writeFasta(java.io.OutputStream outstream, java.util.List<FastaSequence> sequences, int width) throws java.io.IOException
outstream
- sequences
- width
- - the maximum number of characters to write in one linejava.io.IOException
public static void writeFastaKeepTheStream(java.io.OutputStream outstream, java.util.List<FastaSequence> sequences, int width) throws java.io.IOException
java.io.IOException
public static java.util.List<FastaSequence> readFasta(java.io.InputStream inStream) throws java.io.IOException
inStream
- fromjava.io.IOException
public static void writeFasta(java.io.OutputStream os, java.util.List<FastaSequence> sequences) throws java.io.IOException
os
- sequences
- java.io.IOException
public static java.util.Map<java.lang.String,Score> readIUPred(java.io.File result) throws java.io.IOException, UnknownFileFormatException
result
- java.io.IOException
UnknownFileFormatException
private static java.util.Map<java.lang.String,Score> readIUPred(java.io.InputStream input, IUPredResult type) throws java.io.IOException, UnknownFileFormatException
java.io.IOException
UnknownFileFormatException
private static java.util.TreeSet<Range> parseIUPredDomains(java.util.Scanner scan)
scan
- private static float[] parseIUPredScores(java.util.Scanner scan) throws UnknownFileFormatException
UnknownFileFormatException
public static java.util.Map<java.lang.String,Score> readJRonn(java.io.File result) throws java.io.IOException, UnknownFileFormatException
java.io.IOException
UnknownFileFormatException
public static java.util.Map<java.lang.String,Score> readJRonn(java.io.InputStream inStream) throws java.io.IOException, UnknownFileFormatException
>Foobar M G D T T A G 0.48 0.42 0.42 0.48 0.52 0.53 0.54Where all values are tab delimited
inStream
- the InputStream connected to the JRonn output filejava.io.IOException
- is thrown if the inStream has problems accessing the dataUnknownFileFormatException
- is thrown if the inStream represents an unknown source of
data, i.e. not a JRonn outputprivate static float[] convertToNumber(java.lang.String[] annotValues) throws UnknownFileFormatException
UnknownFileFormatException
public static final void closeSilently(java.util.logging.Logger log, java.io.Closeable stream)
log
- stream
- public static java.util.HashMap<java.lang.String,java.util.Set<Score>> readDisembl(java.io.InputStream input) throws java.io.IOException, UnknownFileFormatException
input
- the InputStreamjava.io.IOException
UnknownFileFormatException
private static java.util.TreeSet<Range> parseRanges(java.lang.Enum resultType, java.lang.String lines)
lines
- public static java.util.HashMap<java.lang.String,java.util.Set<Score>> readGlobPlot(java.io.InputStream input) throws java.io.IOException, UnknownFileFormatException
input
- java.io.IOException
UnknownFileFormatException
public static java.util.HashSet<Score> readAAConResults(java.io.InputStream results)
results
- output file of AAConservationConservationMethod
-> float[]public static java.util.List<FastaSequence> openInputStream(java.lang.String inFilePath) throws java.io.IOException, UnknownFileFormatException
inFilePath
- the path to the input filejava.io.IOException
- if the file denoted by inFilePath cannot be readUnknownFileFormatException
- if the inFilePath points to the file which format cannot be
recognisedpublic static void writeClustal(java.io.OutputStream outStream, java.util.List<FastaSequence> sequences, char gapChar) throws java.io.IOException
java.io.IOException