/******************************************************************************
 The computer software and associated documentation called DOMAK hereinafter
 referred to as the WORK which is more particularly identified and described in
 Appendix A of the file LICENSE.  Conditions and restrictions for use of
 this package are also in this file.

 This routine was developed by Robert B. Russell


 All use of the WORK must cite:
 Siddiqui, A. S. and Barton, G. J., "Continuous and Discontinuous Domains: An
 Algorithm for the Automatic Generation of Reliable Protein Domain Definitions" 
 PROTEIN SCIENCE, 4:872-884 (1995).
*****************************************************************************/

#include <stdio.h>
#include <string.h>
#include <rbr_newlib.h>
#include <rdssp.h>
#include <rbr_domain.h>

/* Given a file containing a list of protein descriptors, returns
 *  a list of brookhaven starts and ends, or appropriate wild cards
 *  for subsequent use */
int RBR_getdomain2(IN,domains,ndomain,maxdomain,gottrans,env,filetype,OUTPUT)
FILE *IN;
struct domain_loc *domains;
int *ndomain;
int maxdomain;
int *gottrans;
char *env;
int filetype;	/* 0 = look for PDB files, 1 = look for DSSP files, 2 = don't look for files */
FILE *OUTPUT;
{
	int i,j;
	int comment;
	int count,end,nobjects;

	char c;
	char *buff,*add_buff;
	char *index;



	count=0;
	end=0;
	(*gottrans)=0;
	while(!end) {
	  end=RBR_define2(&domains[count],&i,env,filetype,IN,OUTPUT);
	  if(i==1) (*gottrans)=1;
	  if(end==-1) {
	     fprintf(OUTPUT,"error in domain specification file\n");
	     return -1;
	  }
	  count+=(!end);
	  if(count>maxdomain && end!=1) {
	    fprintf(OUTPUT,"error have exceeded maximum domain limit\n");
	    return -1;
	  }
	  if(count==maxdomain) end=1;
	} /* end while(!end)... */
	(*ndomain)=count;

	/* check for duplication */
	for(i=0; i<(*ndomain); ++i) 
	   for(j=i+1; j<(*ndomain); ++j)  
	      if(strcmp(domains[i].id,domains[j].id)==0) {
		 fprintf(OUTPUT,"error: domain identifiers must not be the same\n");
		 fprintf(OUTPUT,"       found two copies of %s, domains %d & %d\n",
			domains[i].id,i+1,j+1);
		 return -1; 
	      }

	return 0;
}

int RBR_define2(domain,gottrans,env,filetype,INPUT,OUTPUT)
struct domain_loc *domain;
int *gottrans;
char *env;
int filetype;
FILE *INPUT;
FILE *OUTPUT;
/* reads in the next domain descriptor from a supplied input file 
 * returns 0 if all is well, -1 if an error occurs, 1 if EOF occurs */
{

	int i,j,k;
	int nobjects;
	int comment;

 	char c;
	char *index;
	char *descriptor;
	char *buff,*add_buff;
	char *temp;
	char *dirfile;
	
	FILE *TEST;


	buff=(char*)malloc(2000*sizeof(char));
	dirfile=(char*)malloc(2000*sizeof(char));
	add_buff=buff;
	descriptor=(char*)malloc(2000*sizeof(char));
	
	if(filetype==1) sprintf(dirfile,"%s/dssp_files",env);
	else if(filetype==0) sprintf(dirfile,"%s/pdb_files",env);

	comment=1;
	(*gottrans)=0;
	buff[0]='%';
	while(buff[0]=='%' || buff[0]=='#') {
	 i=0; 
	 while((c=getc(INPUT))!=(char)EOF && c!='\n')  {
	    if(i>2000) {
	      fprintf(OUTPUT,"error: line length in domain file exceeds memory limit\n");
	      return -1;
	    }
	    buff[i++]=c;
	 }
	 if(c==(char)EOF) { free(add_buff); return 1; }
	 buff[i]='\0';
	}
	/* read in domain */
	sscanf(buff,"%s",&domain[0].filename[0]); /* read the filename */
	index=strchr(buff,' ');
	sscanf(index,"%s",&domain[0].id[0]);	/* read the identifier */

	/* check to see whether the file exists, otherwise, look for a file
	 *  with a similar ID */
	if(filetype==0 || filetype==1) {
	  if((TEST=fopen(domain[0].filename,"r"))==NULL) {
	    /* look for the file */
	    temp=RBR_getfile(domain[0].id,dirfile,4,OUTPUT); /* assume the first four characters are the id */
	    if(temp[0]=='\0') {
	      fprintf(OUTPUT,"%% file for %s not found, nor was any corresponding file\n",domain[0].id);
	      fprintf(OUTPUT,"%%   found\n");
	      return(-1); 
	    } else {
	      strcpy(&domain[0].filename[0],temp);
	    }
	    free(temp);
	  }
	  fclose(TEST);
	}

	index=strchr(buff,' ');
	/* copy the bit between the braces into the string called descriptor */
	i=0; while(buff[i]!='{') i++;
	i++;
	j=0; while(buff[i]!=(char)EOF && buff[i]!='\n' && buff[i]!='}' && buff[i]!='\0') {
	   descriptor[j]=buff[i];
	   i++; 
	   j++;
	}
	descriptor[j]='\0';
/*	printf("descriptor= %s\n",descriptor); */
	if(buff[i]==(char)EOF) {
	  fprintf(OUTPUT,"error: end of file encountered too early\n");
	  return -1;
	}
	/* allocation of memory, initially */
	domain[0].reverse=(int*)malloc(sizeof(int));
	domain[0].type=(int*)malloc(sizeof(int));
	domain[0].start=(struct brookn*)malloc(sizeof(struct brookn));
	domain[0].end=(struct brookn*)malloc(sizeof(struct brookn)); 
	domain[0].length=(int*)malloc(sizeof(int));
	domain[0].V=(float*)malloc(3*sizeof(float));
	domain[0].v=(float*)malloc(3*sizeof(float));
	domain[0].R=(float**)malloc(3*sizeof(float*));
	domain[0].r=(float**)malloc(3*sizeof(float*));
	for(i=0; i<3; ++i) {
	   domain[0].R[i]=(float*)malloc(3*sizeof(float));
	   domain[0].r[i]=(float*)malloc(3*sizeof(float));
	   for(j=0; j<3; ++j) 
	     if(i==j) domain[0].R[i][j]=domain[0].r[i][j]=1.0;
	     else domain[0].R[i][j]=domain[0].r[i][j]=0.0;
	     domain[0].V[i]=domain[0].v[i]=0.0;
	}

	nobjects=0;
	for(i=0; i<strlen(descriptor); ++i) descriptor[i]=RBR_ltou(descriptor[i]);
	index=descriptor; /* get to one after opening '{' */
	if(index==NULL) return -1;
	while(index[0]!='\0' ) { /* read until end of string */
	   while(index[0]==' ') index++; /* get to next non space */
	   if(strncmp(index,"REVERSE",7)==0) { /* coordinates are to be reversed */
		 domain[0].reverse[nobjects]=1;
		 index=RBR_skiptononspace(index,OUTPUT);
	    } else {
		 domain[0].reverse[nobjects]=0;
		 /* don't skip over the text if the word "REVERSE" isn't there */
	      }
	      if(strncmp(index,"ALL",3)==0) {  /* want all the coordinates in the file */
		 domain[0].type[nobjects]=1;
		 domain[0].start[nobjects].cid=domain[0].start[nobjects].in=
		     domain[0].end[nobjects].cid=domain[0].end[nobjects].in='?';
	 	 domain[0].start[nobjects].n=domain[0].end[nobjects].n=0;
		 index=RBR_skiptononspace(index,OUTPUT);
	      } else if(strncmp(index,"CHAIN",5)==0) { /* want specific chain only */
		 domain[0].type[nobjects]=2;
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 domain[0].start[nobjects].cid=domain[0].end[nobjects].cid=index[0];
		 domain[0].start[nobjects].in=domain[0].end[nobjects].in='?';
		 domain[0].start[nobjects].n=domain[0].end[nobjects].n=0;
		 index=RBR_skiptononspace(index,OUTPUT);
	      } else { /* assume that otherwise a specific start and end will be provided */
		 domain[0].type[nobjects]=3;
		 if(index[0]=='_') domain[0].start[nobjects].cid=' '; 
		 else domain[0].start[nobjects].cid=(*index);
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 sscanf(index,"%d",&domain[0].start[nobjects].n);
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].start[nobjects].in=' ';
		 else domain[0].start[nobjects].in=(*index);
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].end[nobjects].cid=' ';
		 else domain[0].end[nobjects].cid=(*index);
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 sscanf(index,"%d",&domain[0].end[nobjects].n);
		 if((index=RBR_skiptononspace(index,OUTPUT))==NULL) return -1;
		 if(index[0]=='_') domain[0].end[nobjects].in=' ';
		 else domain[0].end[nobjects].in=(*index);
		 index=strchr(index,' ');
		 if(index!=NULL) while(index[0]==' ') index++;
	      } 
	      nobjects++;
	      /* reallocing if necessary */
	      if(index!=NULL && index[0]!='}' && index[0]!='\n') {
		domain[0].reverse=(int*)realloc(domain[0].reverse,(nobjects+1)*sizeof(int));
		domain[0].type=(int*)realloc(domain[0].type,(nobjects+1)*sizeof(int));
	   	domain[0].start=(struct brookn*)realloc(domain[0].start,(nobjects+1)*sizeof(struct brookn));
		domain[0].end=(struct brookn*)realloc(domain[0].end,(nobjects+1)*sizeof(struct brookn));
	        domain[0].length=(int*)realloc(domain[0].length,(nobjects+1)*sizeof(int));
	      }
	      /* now either stop, or move onto the next descriptor */
           } /* end of while((*index... */
	   
	   if(strchr(buff,'}')==NULL) { /* if no close brace is found on the line, it is assumed a transformation is given */
	     (*gottrans)=1;
	     for(i=0; i<3; ++i) {
		for(j=0; j<3; ++j) 
		   if(fscanf(INPUT,"%f",&domain[0].R[i][j])==(int)EOF) {
		      fprintf(OUTPUT,"error: transformation missing in domain file\n");
		      return -1;
	           }
		if(fscanf(INPUT,"%f",&domain[0].V[i])==(int)EOF) {
		   fprintf(OUTPUT,"error: transformation missing in domain file\n");
		   return -1;
		} /* end of if(fscan... */
	     } /* end of for(i... */
	     while((c=getc(INPUT))!='\n'); /* read till the end of the line */
	   }  /* end of if(strchr(buff,'}'... */
	   domain[0].nobj=nobjects;
	   free(add_buff);
	   free(dirfile);
	   free(descriptor);
	   return 0;
}

char *RBR_skiptononspace(index,OUTPUT)
char *index;
FILE *OUTPUT;
{
	index=strchr(index,' ');
	if(index!=NULL) while(index[0]==' ') index++;
	if(index==NULL) {
	   fprintf(OUTPUT,"error in domain descriptors\n");
	   return NULL;
	}
	return index;
}
