|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectcompbio.data.sequence.SequenceUtil
public final class SequenceUtil
Utility class for operations on sequences
| Field Summary | |
|---|---|
static java.util.regex.Pattern |
AA
Valid Amino acids |
static java.util.regex.Pattern |
AMBIGUOUS_AA
Same as AA pattern but with two additional letters - XU |
static java.util.regex.Pattern |
AMBIGUOUS_NUCLEOTIDE
Ambiguous nucleotide |
static java.util.regex.Pattern |
DIGIT
A digit |
static java.util.regex.Pattern |
NON_AA
inversion of AA pattern |
static java.util.regex.Pattern |
NON_NUCLEOTIDE
Non nucleotide |
static java.util.regex.Pattern |
NONWORD
Non word |
static java.util.regex.Pattern |
NUCLEOTIDE
Nucleotides a, t, g, c, u |
static java.util.regex.Pattern |
WHITE_SPACE
A whitespace character: [\t\n\x0B\f\r] |
| Method Summary | |
|---|---|
static java.lang.String |
cleanProteinSequence(java.lang.String sequence)
Remove all non AA chars from the sequence |
static java.lang.String |
cleanSequence(java.lang.String sequence)
Removes all whitespace chars in the sequence string |
static void |
closeSilently(java.util.logging.Logger log,
java.io.Closeable stream)
Closes the Closable and logs the exception if any |
static java.lang.String |
deepCleanSequence(java.lang.String sequence)
Removes all special characters and digits as well as whitespace chars from the sequence |
static boolean |
isAmbiguosProtein(java.lang.String sequence)
Check whether the sequence confirms to amboguous protein sequence |
static boolean |
isNonAmbNucleotideSequence(java.lang.String sequence)
Ambiguous DNA chars : AGTCRYMKSWHBVDN // differs from protein in only one (!) - B char |
static boolean |
isNucleotideSequence(FastaSequence s)
|
static boolean |
isProteinSequence(java.lang.String sequence)
|
static java.util.List<FastaSequence> |
openInputStream(java.lang.String inFilePath)
Reads and parses Fasta or Clustal formatted file into a list of FastaSequence objects |
static java.util.HashSet<Score> |
readAAConResults(java.io.InputStream results)
Read AACon result with no alignment files. |
static java.util.HashMap<java.lang.String,java.util.Set<Score>> |
readDisembl(java.io.InputStream input)
> Foobar_dundeefriends # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 # REM465 355-368 # HOTLOOPS 190-204 # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983 0.85864 0.44331 >Next Sequence name |
static java.util.List<FastaSequence> |
readFasta(java.io.InputStream inStream)
Reads fasta sequences from inStream into the list of FastaSequence objects |
static java.util.HashMap<java.lang.String,java.util.Set<Score>> |
readGlobPlot(java.io.InputStream input)
> Foobar_dundeefriends # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343 # REM465 355-368 # HOTLOOPS 190-204 # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983 0.85864 0.44331 >Next Sequence name |
static java.util.Map<java.lang.String,Score> |
readIUPred(java.io.File result)
Read IUPred output |
static java.util.Map<java.lang.String,Score> |
readJRonn(java.io.File result)
|
static java.util.Map<java.lang.String,Score> |
readJRonn(java.io.InputStream inStream)
Reader for JRonn horizontal file format |
static void |
writeFasta(java.io.OutputStream os,
java.util.List<FastaSequence> sequences)
Writes FastaSequence in the file, each sequence will take one line only |
static void |
writeFasta(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width)
Writes list of FastaSequeces into the outstream formatting the sequence so that it contains width chars on each line |
static void |
writeFastaKeepTheStream(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width)
|
| Methods inherited from class java.lang.Object |
|---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
public static final java.util.regex.Pattern WHITE_SPACE
public static final java.util.regex.Pattern DIGIT
public static final java.util.regex.Pattern NONWORD
public static final java.util.regex.Pattern AA
public static final java.util.regex.Pattern NON_AA
public static final java.util.regex.Pattern AMBIGUOUS_AA
public static final java.util.regex.Pattern NUCLEOTIDE
public static final java.util.regex.Pattern AMBIGUOUS_NUCLEOTIDE
public static final java.util.regex.Pattern NON_NUCLEOTIDE
| Method Detail |
|---|
public static boolean isNucleotideSequence(FastaSequence s)
public static boolean isNonAmbNucleotideSequence(java.lang.String sequence)
public static java.lang.String cleanSequence(java.lang.String sequence)
sequence -
public static java.lang.String deepCleanSequence(java.lang.String sequence)
sequence -
public static java.lang.String cleanProteinSequence(java.lang.String sequence)
sequence - the sequence to clean
public static boolean isProteinSequence(java.lang.String sequence)
sequence -
public static boolean isAmbiguosProtein(java.lang.String sequence)
sequence -
public static void writeFasta(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width)
throws java.io.IOException
outstream - sequences - width - - the maximum number of characters to write in one line
java.io.IOException
public static void writeFastaKeepTheStream(java.io.OutputStream outstream,
java.util.List<FastaSequence> sequences,
int width)
throws java.io.IOException
java.io.IOException
public static java.util.List<FastaSequence> readFasta(java.io.InputStream inStream)
throws java.io.IOException
inStream - from
java.io.IOException
public static void writeFasta(java.io.OutputStream os,
java.util.List<FastaSequence> sequences)
throws java.io.IOException
os - sequences -
java.io.IOException
public static java.util.Map<java.lang.String,Score> readIUPred(java.io.File result)
throws java.io.IOException,
UnknownFileFormatException
result -
java.io.IOException
UnknownFileFormatException
public static java.util.Map<java.lang.String,Score> readJRonn(java.io.File result)
throws java.io.IOException,
UnknownFileFormatException
java.io.IOException
UnknownFileFormatException
public static java.util.Map<java.lang.String,Score> readJRonn(java.io.InputStream inStream)
throws java.io.IOException,
UnknownFileFormatException
>Foobar M G D T T A G 0.48 0.42 0.42 0.48 0.52 0.53 0.54Where all values are tab delimited
inStream - the InputStream connected to the JRonn output file
java.io.IOException - is thrown if the inStream has problems accessing the data
UnknownFileFormatException - is thrown if the inStream represents an unknown source of
data, i.e. not a JRonn output
public static final void closeSilently(java.util.logging.Logger log,
java.io.Closeable stream)
log - stream -
public static java.util.HashMap<java.lang.String,java.util.Set<Score>> readDisembl(java.io.InputStream input)
throws java.io.IOException,
UnknownFileFormatException
input - the InputStream
java.io.IOException
UnknownFileFormatException
public static java.util.HashMap<java.lang.String,java.util.Set<Score>> readGlobPlot(java.io.InputStream input)
throws java.io.IOException,
UnknownFileFormatException
input -
java.io.IOException
UnknownFileFormatExceptionpublic static java.util.HashSet<Score> readAAConResults(java.io.InputStream results)
results - output file of AAConservation
ConservationMethod -> float[]
public static java.util.List<FastaSequence> openInputStream(java.lang.String inFilePath)
throws java.io.IOException,
UnknownFileFormatException
inFilePath - the path to the input file
java.io.IOException - if the file denoted by inFilePath cannot be read
UnknownFileFormatException - if the inFilePath points to the file which format cannot be
recognised
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||