#!/usr/bin/perl # AMPS to OC # AMPS pairwse output parser to OC input format # Created 30/01/03 by MajicPhatCalves (a.k.a Greg) use strict; use warnings; system ("clear"); my ($infile, $outfile, $field) = @ARGV; my ($count, @names, @scores); ############################# INFILE PROCESSING ############################### unless ((defined $infile) && (defined $outfile) && (defined $field)){ warn "###### AMPS2OC ######\n"; warn "Greg Machray 24/02/2003\n\n"; warn "Command line not complete\n"; warn "Correct usage is: amps2oc [infile_name] [outfile_name] [score field no.(1-5)]\n"; print "1=\%ID 2=NAS 3=NASAL 4=SD 5=score\n\n\n"; exit; } open (IN, $infile)|| die "Input file not found, or not accessible: $!\n"; # 1,2,3 on command line are fields 9,10,11 if ($field < 4){$field += 8;} # 4,5 are fields 14 an 15 else { $field += 10;} foreach my $line (){ # If line has the identifiers in it, rip them out if ($line =~ m/>\s*?(\w.*?)\s/){ push @names, $1; } # if line is part of the scores list; split into elements: # [9] is percentage identity # [10] for NAS # [11] for NASAL # [14] for SD # [15] for score elsif ($line =~ m/^\s+\d+\s+\d+/){ my @parts = split /\s+/, $line; push @scores, $parts[$field]; } } close IN; ############################# OUTFILE PROCESSING ############################## open (OUT, ">$outfile")|| die "Output file can't be created (or added to): $!\n"; # Get total number of sequences $count = scalar @names; # Set output seperator to newline - nice for easy output $" = "\n"; # Print out in the correct format print OUT "$count\n"; print OUT "@names\n"; print OUT "@scores\n"; close OUT; ############################################################################### exit;