/****************************************************************************

 AGETBLOC - a routine to read an AMPS block file

   Copyright:  University of Oxford (1992)

TERMS OF USE:

The computer software and associated documentation called ALSCRIPT hereinafter
referred to as the WORK is more particularly identified and described in 
Appendix A.

The WORK was written and developed by: Geoffrey J. Barton

Laboratory of Molecular Biophysics
University of Oxford
Rex Richards Building
South Parks Road
Oxford OX1 3QU
U.K.

Tel:  (+44) 865-275368
Fax:  (+44) 865-510454

Internet: gjb@bioch.ox.ac.uk
Janet:    gjb@uk.ac.ox.bioch

The WORK is Copyright (1992) University of Oxford

Administrative Offices
Wellington Square
Oxford OX1 2JD
U.K.

CONDITIONS:

The WORK is made available for educational and non-commercial research 
purposes.

For commercial use, a commercial licence is required - contact the author
at the above address for details.

The WORK may be modified, however this text, all copyright notices and
the authors' address must be left unchanged on every copy, and all
changes must be documented after this notice.  A copy of the
modified WORK must be supplied to the author.

All use of the WORK must cite:  Barton, G. J. (1993), ALSCRIPT: A Tool to
Format Multiple Sequence Alignments, Protein Engineering, Volume 6, No. 1, 
pp. 37-40.

APPENDIX A:

The program package known as ALSCRIPT is made up of the following files:

README     	This copyright notice
EXAMPLE.BLC     Example block file
EXAMPLE.COM     Example command file
EXAMPLE1.BLC    Example block file
EXAMPLE1.COM    Example command file
EXAMPLE2.COM    Example command file
EXAMPLE3.COM    Example command file
ALSCRIPT.EXE    ALSCRIPT executable program
MSF2BLC.EXE     GCG MSF to BLOCK file conversion program
CLUS2BLC.EXE    CLUSTAL PIR file to BLOCK file conversion program
EXAMPLE1.PS     Example output of program
ALSCRIPT.DOC    Documentation
MAKEFILE        Makefile for programs
MAKEFILE.GCC    Makefile for GCC compiler
MAKEFILE.SGI    Makefile for Silicon Graphics 
MAKEFILE.WAT    Makefile for WATCOM 386 C compiler
AGETBLOC.C      Source code for block file reading routine
ALPS.C          Source code for main ALSCRIPT routines
ARRAY.H         Header file
CLUS2BLC.C      Source code for CLUSTAL PIR file to BLOCK file conversion program
DEFAULTS.H      Header file
GJUTIL.C        Source code for utility routines
GJUTIL.H        pHeader file for utility routines
MSF2BLC.C       Source code for GCG MSF to BLOCK file conversion program
MAKEFILE.SUN    Makefile for Sun acc compiler.
ALSCRIPT.C      Source code for ALSCRIPT main program


****************************************************************************

History:

15th November 1992 - ANSI C version - also uses GJ... routines.
This version adapted with error messages for alscript.

11th June 1992.
Agetbloc:  like getbloc, but does not require that every character read into
the seqs structure is an alphabetic character.  Also does not contain the 
option to convert the "sequences" read in into integer format

getbloc:  Read an AMPS style block file into the seqs array
the nbloc aligned sequences are stored in positions 1-nbloc.

This is a straight-ish translation of the fortran routine fbloc.f, hence
the non-C like goto's...

Sequence lengths are actual length +1 + 1.  This allows position 0 to 
be reserved for future use, and preserves the '\0' for output.

18/Feb/1993:Fix (i) to (i+1) in realloc.  Spotted by RBR.

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "array.h"
#include "gjutil.h"


int Agetbloc(FILE *bfile,struct seqdat *bloc,int *nbloc)

{
    int i,llen;
    extern int MAXnseq, MAXslen, MAXilen, MAXtlen, MAXnbloc;
    char *buff;
    extern FILE *std_in,*std_out,*std_err;

    char *idstart, *idend, *bstart, sident = 0;
    int idlen,totseq = 0,k,j;

    buff = (char *) GJmalloc(sizeof(char) * MAXtlen);

l1: 
    buff = fgets(buff,MAXtlen,bfile);
    if(buff == NULL){
	fprintf(std_err,"Premature end of BLOCK FILE\n");
	return 0;
    }
    if((idstart = strchr(buff,'>')) != NULL){
	if(++totseq == MAXnbloc){
	    fprintf(std_err,
	    "Max Number of block file sequences exceeded: %d\n",
	    totseq);
	    fprintf(std_err,"Use MAX_NSEQ command to increase value");
	    return 0;
	}
	sident = 1;
	idend = strchr(idstart,' ');
	if(idend == NULL){
	  idend = strchr(idstart,'\0');
	}
	if(idend == NULL){
	  fprintf(std_err,"Error reading identifier:%s\n",idstart);
	  error("Exiting",1);
	}
	idlen = (idend - idstart) + 1;
	bloc[totseq].id = (char *) malloc(sizeof(char) * idlen);
	bloc[totseq].id = GJstrblank(bloc[totseq].id,idlen);
	strncpy(bloc[totseq].id,idstart+1,idlen-1);   /* don't copy the ">" symbol */
	bloc[totseq].ilen = idlen-1;
	bloc[totseq].id[idlen-1] = '\0';

	bloc[totseq].tlen = strlen(idend)+1;
	bloc[totseq].title = (char *) GJmalloc(sizeof(char) * bloc[totseq].tlen);
	bloc[totseq].title = GJstrblank(bloc[totseq].title,bloc[totseq].tlen);
	strcpy(bloc[totseq].title,idend);

	bloc[totseq].seq = (char *) GJmalloc(sizeof(char) * MAXslen);
        bloc[totseq].seq[0] = ' ';
	goto l1;
    } else if(sident){
	if((idstart = strchr(buff,'*')) != NULL){
	    i = 0;
	    while((buff = fgets(buff,MAXtlen,bfile)) != NULL){
		if(*idstart == '*'){
/*		    fprintf(stdout,"Blocfile read: Length: %d\n",i);*/
		    ++i;
		    for(k=1;k<totseq+1;++k){
			bloc[k].slen = i;
			bloc[k].seq[i] = '\0';
			bloc[k].seq = (char *) realloc(bloc[k].seq,sizeof(char)*(i+1)); /*i+1 fix suggested by rbr*/
		    }
		    *nbloc = totseq;
		    free(buff);
		    return 1;
		}
		bstart = idstart;
		++i;
		if(i==MAXslen)error("Max Sequence length exceeded - use MAX_SEQ_LEN command to increase",1);
		for(j=1;j<totseq+1;++j){
		    /*cope with short lines */
/*		    if(!isalnum(*bstart)) *bstart = ' '; */
		    bloc[j].seq[i] = *bstart++;
		}
	    }
	    fprintf(std_err,"No terminating * in blocfile\n");
	    return 0;
	}else{
	  goto l1;
	}
    } else {
	goto l1;
    }
}
 
