#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "array.h"

#include "includes.h"

/*
11th June 1992.
Agetbloc:  like getbloc, but does not require that every character read into
the seqs structure is an alphabetic character.  Also does not contain the 
option to convert the "sequences" read in into integer format


getbloc:  Read an AMPS style block file into the seqs array
the nbloc aligned sequences are stored in positions 1-nbloc.

This is a straight-ish translation of the fortran routine fbloc.f, hence
the non-C like goto's...

Sequence lengths are actual length +1 + 1.  This allows position 0 to 
be reserved for future use, and preserves the '\0' for output.

Personalised by RBR October 1992

Additional modifications by CDL April 1993

*/
getbloc(bfile,bloc,nbloc,blocfile,count)
FILE *bfile;
int *nbloc,*count;
char *blocfile;
struct seqdat *bloc;
{
    int i,llen;
    char *buff;

    char *idstart, *idend, *bstart, sident = 0;
    int idlen,totseq = 0,k,j;
    char *GJstrblank();
	
    buff = malloc(sizeof(char) * MAXtlen);

l1: 
    buff = fgets(buff,MAXtlen,bfile);
    if(buff == NULL){
      printf("\nFile Error: premature end of blocfile \"%s\".\n",blocfile);
      exit(0);
    } 
    if((idstart = strchr(buff,'>')) != NULL){
	if(++totseq == MAXnbloc){
          printf("\nFile Error: maximum number of");
          printf("sequences exceeded in file \"%s\".\n",blocfile);
          exit(0); 
	}
	sident = 1;
	idend = strchr(idstart,' ');
	if(idend == NULL){
	  idend = strchr(idstart,'\0');
	}
	if(idend == NULL){
	  printf("File Error: can't read identifier:%s...\n",idstart);
          exit(0);
	}
	idlen = (idend - idstart) + 1;
	bloc[totseq].id = malloc(sizeof(char) * idlen);
	bloc[totseq].id = GJstrblank(bloc[totseq].id,idlen);
	strncpy(bloc[totseq].id,idstart+1,idlen-1);/* don't copy ">" symbol */
	bloc[totseq].ilen = idlen-1;
	bloc[totseq].id[idlen-1] = '\0';

	bloc[totseq].tlen = strlen(idend)+1;
	bloc[totseq].title = malloc(sizeof(char) * bloc[totseq].tlen);
	bloc[totseq].title = GJstrblank(bloc[totseq].title,bloc[totseq].tlen);
	strcpy(bloc[totseq].title,idend);

	bloc[totseq].seq = (char *) malloc(sizeof(char) * (MAXslen+1));
        bloc[totseq].seq[0] = ' ';
	goto l1;
    } else if(sident){
	if((idstart = strchr(buff,'*')) != NULL){
	    i = 0;
	    while((buff = fgets(buff,MAXtlen,bfile)) != NULL){
		if(*idstart == '*'){
	            *count = i;
		    ++i;
		    for(k=1;k<totseq+1;++k){
			bloc[k].slen = i;
			bloc[k].seq = realloc(bloc[k].seq,sizeof(char)*(i+1));
			bloc[k].seq[i] = '\0';

		    }
		    *nbloc = totseq;
		    free(buff);
		    return 1;
		}
		bstart = idstart;
		++i;
		if(i==MAXslen) printf("Max Sequence length exceeded - use MAX_SEQ_LEN command to increase");
		for(j=1;j<totseq+1;++j){
		    /*cope with short lines */
		    bloc[j].seq[i] = *bstart++;
		}
	    }
          printf("File Error: no terminating \"*\" in blocfile \"%s\".\n",
            blocfile);
          exit(0);
	} else goto l1;
    } else {
	goto l1;
    }
}

char *GJstrblank(string,len)
char *string;
int len;
/* set a string to blanks and add terminating nul */
{
  --len;
  string[len] = '\0';
  --len;
  while(len > -1){
    string[len] = ' ';
    --len;
  }
  return string;
}
  

 
