/***************************************************************************

clus2blc:  A program to convert a CLUSTAL PIR format alignment file to an 
                                                        AMPS blockfile.

   Copyright:  University of Oxford (1992)

TERMS OF USE:

The computer software and associated documentation called ALSCRIPT hereinafter
referred to as the WORK is more particularly identified and described in 
Appendix A.

The WORK was written and developed by: Geoffrey J. Barton

Laboratory of Molecular Biophysics
University of Oxford
Rex Richards Building
South Parks Road
Oxford OX1 3QU
U.K.

Tel:  (+44) 865-275368
Fax:  (+44) 865-510454

Internet: gjb@bioch.ox.ac.uk
Janet:    gjb@uk.ac.ox.bioch

The WORK is Copyright (1992) University of Oxford

Administrative Offices
Wellington Square
Oxford OX1 2JD
U.K.

CONDITIONS:

The WORK is made available for educational and non-commercial research 
purposes.

For commercial use, a commercial licence is required - contact the author
at the above address for details.

The WORK may be modified, however this text, all copyright notices and
the authors' address must be left unchanged on every copy, and all
changes must be documented after this notice.  A copy of the
modified WORK must be supplied to the author.

All use of the WORK must cite:  Barton, G. J. (1993), ALSCRIPT: A Tool to
Format Multiple Sequence Alignments, Protein Engineering, Volume 6, No. 1, 
pp. 37-40.

APPENDIX A:

The program package known as ALSCRIPT is made up of the following files:

README     	This copyright notice
EXAMPLE.BLC     Example block file
EXAMPLE.COM     Example command file
EXAMPLE1.BLC    Example block file
EXAMPLE1.COM    Example command file
EXAMPLE2.COM    Example command file
EXAMPLE3.COM    Example command file
ALSCRIPT.EXE    ALSCRIPT executable program
MSF2BLC.EXE     GCG MSF to BLOCK file conversion program
CLUS2BLC.EXE    CLUSTAL PIR file to BLOCK file conversion program
EXAMPLE1.PS     Example output of program
ALSCRIPT.DOC    Documentation
MAKEFILE        Makefile for programs
MAKEFILE.GCC    Makefile for GCC compiler
MAKEFILE.SGI    Makefile for Silicon Graphics 
MAKEFILE.WAT    Makefile for WATCOM 386 C compiler
AGETBLOC.C      Source code for block file reading routine
ALPS.C          Source code for main ALSCRIPT routines
ARRAY.H         Header file
CLUS2BLC.C      Source code for CLUSTAL PIR file to BLOCK file conversion program
DEFAULTS.H      Header file
GJUTIL.C        Source code for utility routines
GJUTIL.H        pHeader file for utility routines
MSF2BLC.C       Source code for GCG MSF to BLOCK file conversion program
MAKEFILE.SUN    Makefile for Sun acc compiler.
ALSCRIPT.C      Source code for ALSCRIPT main program


****************************************************************************

Notes:  This program can be run as a pipe:  type clus2blc -q < input > output
Only error messages will be output to std_err

Default mode is interactive and prompts for filenames.

The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN
defines in the header file "defaults.h" have no effect.  If a system memory
limit is reached, then a "malloc error" message will be written and the
program will stop.  Most computers should happily cope with large numbers of
long sequences.  If yours doesn't, some  possible solutions are outlined in
the user manual - alscript.doc.

****************************************************************************/

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "gjutil.h"
#include "array.h"
#include "defaults.h"

#define TOKENS " \t\n"


main(int argc,char *argv[])
{
	struct seqdat *seqs;
	FILE *fp,*fout;
	int nseq;
	int found;
	int i,j;
        char *token,*sbit;
        char *line;
        extern FILE *std_err,*std_in,*std_out;
        char *msffile;
        char *blocfile;
        int quiet;
        char c;
	int allen;  /* total alignment length */
        
        std_err = stderr;
        std_in = stdin;
        std_out = stdout;
        
        line = GJstrcreate(MAX_INLEN," ");
        msffile = GJstrcreate(MAX_INLEN,NULL);
        blocfile = GJstrcreate(MAX_INLEN,NULL);

        nseq = 0;
        found = 0;
        quiet = 0;
	allen = 0;

        if(argc > 1){
	  if(strcmp(argv[1],"-q")==0){
            /* Quiet mode - read .MSF file from stdin and output block file to stdout */
            quiet = 1;
            fp = std_in;
            fout = std_out;
	  }
        }else{
          /* Verbose mode - prompt for all filenames */
          fprintf(std_out,"\n\n");
          fprintf(std_out,"CLUSTAL NBRF-PIR format to AMPS Blockfile conversion\n");
          fprintf(std_out,"Copyright: University of Oxford (1992)\n");
          fprintf(std_out,"Author: G. J. Barton (1992)\n\n");
          fprintf(std_out,"Max number/length of alignment - Defined by System\n");
          fprintf(std_out,"If you get a malloc error message - see manual\n\n");
          fprintf(std_out,"Enter CLUSTAL NBRF-PIR alignment filename: ");
          
          fscanf(std_in,"%s",msffile);
          fprintf(std_out,"Opening: %s\n",msffile);
          fp = GJfopen(msffile,"r",1);
          
          fprintf(std_out,"Enter Block filename: ");
          fscanf(std_in,"%s",blocfile);
          fprintf(std_out,"Opening: %s\n",blocfile);
          fout = GJfopen(blocfile,"w",1);
        }
	
	fprintf(fout,"\n");
	fprintf(fout,"Conversion of CLUSTAL NBRF-PIR file to AMPS BLOCKFILE format\n");
	fprintf(fout,"clus2blc:  Geoffrey J. Barton (1992)\n\n");

        seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat));

       	if(!quiet)fprintf(std_out,"Reading .pir file\n");
       	nseq = 0;
        while(fgets(line,MAX_INLEN,fp) != NULL){
	  if(line[0] == '>'){
	    /* found an identifier */
	    token = strtok(&line[1]," \n");
            if(token != NULL){
              seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq + 1));
              seqs[nseq].id = GJstrdup(token);
              if(fgets(line,MAX_INLEN,fp) != NULL){
                /* read the title line */
                seqs[nseq].title = GJstrdup(line);
                seqs[nseq].seq = GJstrcreate(MAX_SEQ_LEN,NULL);
                seqs[nseq].slen = 0;
                seqs[nseq].seq = (char *) GJmalloc(sizeof(char));
                i=0;
                while((c = fgetc(fp)) != '*'){
                    /* read characters until * */
                    if(isalpha(c) || c == '-' || c == '.'){
                        seqs[nseq].seq = (char *) GJrealloc(seqs[nseq].seq,sizeof(char) * (i+1));
                        seqs[nseq].seq[i] = c;
                        ++i;
                    }else if(c == EOF){
                        break;
                    }
		}
	      }
              seqs[nseq].slen = i;
	      if(i > allen) allen = i;
              ++nseq;
	    }
	  }
	}

        if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq);
        if(!quiet)fprintf(std_out,"Writing .blc file\n");
        
        for(i=0;i<nseq;++i){
            fprintf(fout,">%s %s",seqs[i].id,seqs[i].title);
        }
        fprintf(fout,"* iteration 1\n");
        for(i=0;i<allen;++i){
            for(j=0;j<nseq;++j){
	        if(seqs[j].slen <= i){
		  fprintf(fout,"%c",' ');
		}else{
		  fprintf(fout,"%c",seqs[j].seq[i]);
		}
            }
            fprintf(fout,"\n");
        }
        fprintf(fout,"*\n");
        if(!quiet)fprintf(std_out,"All done\n");
        
        for(i=0;i<nseq;++i){
	  GJfree(seqs[i].seq);
	  GJfree(seqs[i].id);
  	  GJfree(seqs[i].title);
	}
	GJfree(seqs);
	GJfree(line);
	GJfree(blocfile);
	GJfree(msffile);

}	
