/* select3  -	1. read output file of scan6 run
		2. read each entry of database on stdin - if entry is in output
		of scan6 run, then save.
		3. sort saved sequences and output to stdout

Author: Geoff Barton 1990
Laboratory of Molecular Biophysics
South Parks Road
Oxford OX1 3QU

VERBOSE = 1 for interactive messages
          0 for command line arguments
SGI = if 1 then include SGI specific comtop routine

Does not work properly on SGI - seems to miss Four character identifiers...

*/


#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include "array.h"
#include "defaults.h"

#define VERBOSE 1
#define SGI 0

int MAXnseq = MAX_NSEQ;
int MAXslen = MAX_SEQ_LEN;
int MAXilen = MAX_ID_LEN;
int MAXtlen = MAX_TITLE_LEN;

    struct result {
	int score;
	struct seqdat seq;
    };

    struct tops {
	char *id;
	int score;
    };

int comres();
int comtop();
void copseq();

main(argc, argv)

int	argc;
char	*argv[];

{
    FILE *fp,*fdb,*fout;
    char *id;
    char *com;
    int score, nseq, count, i, total,j, brief;
    struct seqdat *seqs;
    int lc;
    int end;

    char *idfile,*dbfile,*ofile;

    struct tops *topid, *found, *search;/* id, score pairs required */

    struct result *res;

    com = (char *) malloc(sizeof(char) *10);

    seqs = (struct seqdat *) malloc(sizeof(struct seqdat));

/*    res = (struct result *) malloc(sizeof(struct result));*/

    topid = (struct tops *) 
	    malloc(sizeof(struct tops) * MAX_NSEQ); 
    search = (struct tops *) malloc(sizeof(struct tops));

    nseq = 0;

    if(VERBOSE){
	idfile = (char *) malloc(sizeof(char)*MAXtlen);
	dbfile = (char *) malloc(sizeof(char)*MAXtlen);
	ofile = (char *) malloc(sizeof(char)*MAXtlen);
	printf("\n\nProgram S E L E C T\n\n");
	printf("Extracts sequences from PIR database\n\n");
	printf("Author: G. J. Barton (1990)\n");
	printf("Maximum Allowed Sequence Length: %d\n",MAXslen);
	printf("Maximum Allowed Number of Sequences: %d\n\n",MAX_NSEQ);
	printf("Enter name of file containing SCORE ID pairs: ");
	scanf("%s",idfile);
	printf("\nOpening File: %s\n\n",idfile);
	fp = fopen(idfile,"r");
	if(fp == NULL)error("Cannot open file",1);
	printf("Enter Database Filename: ");
	scanf("%s",dbfile);
	printf("\nOpening File: %s\n\n",dbfile);
	fdb = fopen(dbfile,"r");
	if(fdb == NULL)error("Cannot open file",2);
	printf("Just Extract Identifiers/titles (no sequences) ?[Y/N]: ");
	scanf("%s",com);
	brief=0;
	if(*com=='y'||*com=='Y'){
	    brief=1;
	    printf("Only identifiers and titles will be Output\n");
	}
	printf("\nEnter Output Filename: ");
	scanf("%s",ofile);
	printf("\nOpening File: %s\n\n",ofile);
	fout = fopen(ofile,"w");
	if(fout == NULL)error("Cannot Open file",2);
    }else{
	    if(argc > 1){
		fp = fopen(argv[1],"r");
		brief = (argc > 2) ? 1 : 0;
	    }else{
		fprintf(stderr,"Must Supply scan output file as 1st arg\n");
		exit(1);
	    }
    }

    count = 0;
    total = 0;
    /* get the id list to find in the database */
    while(count < MAX_NSEQ && (topid[count].id = malloc(MAX_ID_LEN)) && 
	fscanf(fp,"%d %s", &score, topid[count].id) != EOF){
	topid[count].id = realloc(topid[count].id,(strlen(topid[count].id)+1));
	topid[count].score = score;
	++count;	
    }
    topid = (struct tops *) realloc(topid, sizeof(struct tops) * count);

    /* sort the id list in ascending order */
    qsort((char *) topid, count, sizeof(struct tops), comtop);	


    if(VERBOSE){
	printf("Searching for: %d Sequences\n",count);
        for(i=0;i<count;++i){
	    printf("%d %s\n",i+1,topid[i].id);
	}
    }
    
    seqs[0].slen = 0;
    while(total < count){
	if(VERBOSE){
	    end = gseq(fdb,seqs,&nseq,2);
	}else{
	    end = gseq(stdin,seqs,&nseq,2);	    
	}
	if(end == 0){
	    fprintf(stderr,"ERROR:  End of Database file\n");
	    exit(0);
	}





	search->id = seqs[nseq].id;
	search->score = 0;

    	found = (struct tops *) 
		bsearch((char *) search, (char *) topid,
			count,sizeof(struct tops), comtop);

/*        if(seqs[nseq].id[0] == 'H' && seqs[nseq].id[1] == 'Z'){
            if(found == NULL)
	    printf("%s %d\n", seqs[nseq].id,strlen(seqs[nseq].id));
	}
*/

	if(found != NULL){
		if(total < 1){
		    res = (struct result *) malloc(sizeof(struct result));
		}else{
		    res = (struct result *) 
                          realloc(res,sizeof(struct result) * (total+1));
		}

/*		res[total].seq = *seqs;*/
		copseq(&res[total].seq,seqs);
		res[total].score = found->score;
		if(VERBOSE){
		    printf("Found: %s %5d\n",found->id,total+1);
		}

		++total;
	}
    }

    fclose(fp);
    qsort((char *) res, total, sizeof(struct result),comres);

    if(VERBOSE){
	printf("Extracted: %d Sequences\n",total);
    }

    if(!brief){
	if(VERBOSE){
	    for(j=0; j<total; ++j){
	        lc = 0;
		fprintf(fout,">%s\n",res[j].seq.id);
		fprintf(fout,"%s\n",res[j].seq.title);
		for(i = 1; i < res[j].seq.slen-1; ++i){
		    ++lc;
		    if(lc == 50){
		      fprintf(fout,"\n");
		      lc = 0;
		    }
		    fputc(res[j].seq.seq[i],fout);
		}
		fprintf(fout,"*\n");
	    }
	}else{
	    for(j=0; j<total; ++j){
		fprintf(stdout,">%s\n",res[j].seq.id);
		fprintf(stdout,"%s\n",res[j].seq.title);
		for(i = 1; i < res[j].seq.slen-1; ++i){
		    ++lc;
		    if(lc == 50){
		      fprintf(stdout,"\n");
		      lc = 0;
		    }
		    fputc(res[j].seq.seq[i],stdout);
		}
		fprintf(stdout,"*\n");
	    }
	}
    }else{
	if(VERBOSE){
	    for(j=0; j<total; ++j){
		fprintf(fout,"%s\t",res[j].seq.id);
		fprintf(fout,"%d\t",res[j].score);
		fprintf(fout,"%s\n",res[j].seq.title);
	    }
	}else{
	    for(j=0; j<total; ++j){
		fprintf(stdout,"%s\t",res[j].seq.id);
		fprintf(stdout,"%d\t",res[j].score);
		fprintf(stdout,"%s\n",res[j].seq.title);
	    }
	}
    }
}

int comres(left,right)

struct result *left, *right;

{
    return right->score - left->score;
}

#if SGI == 1 
int comtop(left,right)

char *left, *right;

{
/*
    if(
       ((struct tops *)left)->id[0] == 'H' && 
       ((struct tops *)left)->id[1] == 'Z')
	{
	   printf("%s %s\n",((struct tops *)left)->id,
                  ((struct tops *)right)->id);
	}
*/
    return (strcmp(
              ((struct tops *)left)->id, 
              ((struct tops *)right)->id));
}
#else
int comtop(left,right)

struct tops  *left, *right;

{
    return strcmp(left->id, right->id);
}
#endif


void copseq(to, from)
struct seqdat *to,*from;
{
    int i;
    to->ilen = from->ilen;
    to->id = (char *) malloc(from->ilen * sizeof(char));
    for(i=0;i<to->ilen;++i){
        to->id[i]=from->id[i];
    }
    to->tlen = from->tlen;
    to->title = (char *) malloc(from->tlen * sizeof(char));
    for(i=0;i<to->tlen;++i){
	to->title[i]=from->title[i];
    }
    to->slen = from->slen;
    to->seq = (char *) malloc(from->slen * sizeof(char));
    for(i=0;i<to->slen;++i){
	to->seq[i]=from->seq[i];
    }
}
