/******************************************************************************
 The computer software and associated documentation called DOMAK hereinafter
 referred to as the WORK which is more particularly identified and described in
 Appendix A of the file LICENSE.  Conditions and restrictions for use of
 this package are also in this file.

 This routine was written by Asim S. Siddiqui

 The WORK is Copyright (1995) A. S. Siddiqui and G. J. Barton

 All use of the WORK must cite:
 Siddiqui, A. S. and Barton, G. J., "Continuous and Discontinuous Domains: An
 Algorithm for the Automatic Generation of Reliable Protein Domain Definitions" 
 PROTEIN SCIENCE, 4:872-884 (1995).
*****************************************************************************/

/* $Id: domak.c,v 1.6 2004/09/10 14:38:58 geoff Exp $ */

/* 
 * Title
 *     domak.c
 * Purpose
 *     contains the main part of the DO-M-AK program
 * SccsId:
 *     %W%   %U%   %E%
 */

#include <stdlib.h>
#include <unistd.h>
#include "array.h"
#include <stdio.h>
#include <rdssp.h>
#include <asd_structs.h>
#include <string.h>
#include <gjutil.h>
#include <asstr_util.h>
#include <ass_stamp_utils.h>
#include <asd_make_domains.h>
#include <asd_contacts_utils.h>
#include <rbr_newlib.h>
#include <asd_get_params.h>

extern Asd_Parameters params;

void file_open_error(char *code_id,char *env,char *file);

int main(int argc, char *argv[])
{
    FILE *test;                          /* temporary file p        */
    FILE *output_fptr;                   /* output file pointer     */
    FILE *rasmol_fptr;                   /* rasmol file pointer     */
    char opt;                            /* Command-line option */
    int num_domains;                     /* number of domains       */
    int num_atoms;                       /* number of atoms         */
    int i;                               /* loop counter            */
    int j;                               /* loop counter            */
    int min_domain;                      /* lowest domain           */
    int max_domain;                      /* highest domain          */
    int got_code;
    int got_chain;
    int get_params;
    struct domain_loc *domain;           /* domain structure        */
    char *code;                         /* pdb code                */
    char code_id[7];                     /* pdb code                */
    char *output_file_name;              /* output file name        */
    char *rasmol_file_name;              /* rasmol file name        */
    char *dssp_file_name;                /* dssp file name          */
    char *contacts_file_name;            /* contacts file name      */
    char *pdb_file_name;                 /* pdb file name           */
    char *parameters_file_name;          /* parameters file name    */
    char *ss;                            /* secondary struct        */
    char *sheet;                         /* sheet structure         */
    char *env;                           /* enviroment variable     */
    char cid;                            /* chain identifier        */
    struct brookn *bn;                   /* brookn numbers          */
    struct br *bridges;                  /* beta sheet bridges      */
    Asd_Contact_Info **contact_info;     /* contacts structure read */
    Asd_Contact_Info **contact_rev_info; /* contacts structure read */
    Asd_Domain_List *d_list;             /* list of domains         */

    if((env=getenv("DOMAKDIR"))==NULL) {
        printf("Error: the enviroment variable DOMAKDIR is undefined\n");
	printf("Set the variable to the path of the directory containing the files:\n");
	printf("dssp_files pdb_files contact_files.\n");
	printf("This is usually the def/ subdirectory in the domak installation directory.\n");
        return 2;
    } /*if*/

    if (argc < 2) {
        fprintf(stdout, "usage : domak -c<PDB code> (-d<chain> -p<parameters file> -o<outputfile>)\n");
        return 1;
    } /*if*/

    GJinitfile();
    output_file_name = asstr_save("output.res");
    got_code = 0;
    got_chain = 0;
    get_params = 0; 
    cid = '*';
    
    opterr = 0;
    while ((opt = getopt(argc,argv,":c:d:p:o:w")) != EOF){
	switch (opt){
	case 'c':
            /* PDB code given */
            strcpy(code_id,optarg);
            got_code = 1;
	    break;
	case 'd':
            /*chain*/
            cid = optarg[0];
            got_chain = 1;
	    break;
	case 'p':
            /*parameters file*/
            parameters_file_name = asstr_save(optarg);
            get_params = 1;
	    break;
	case 'o':
            /*output file*/
	    output_file_name = asstr_save(optarg);
	    break;
	case ':':
	    fprintf(stderr,"Missing argument\n");
	    exit(-1);
	default:
	    fprintf(stderr,"Unrecognised option: %c\n",optopt);
	    exit(-1);
	    break;
	} 
    } /*while*/

    if (got_code == 0) {
	fprintf(stderr,"No PDB code specified\n");
        fprintf(stderr,"usage : domak -c <PDB code> (-d <chain> -p <parameters file> -o <outputfile>)\n");
	return 2;
    } /*if*/

    fprintf(stdout, "# DOMAK running....\n");
    
    test = GJfopen(output_file_name, "a", 1);
    fclose(test);

    params.MIN_DOMAIN_SIZE          = 40;
    params.MIN_SEGMENT_SIZE_END     =  5;
    params.MIN_SEGMENT_SIZE_MID     = 25;
    params.MIN_DOUBLE_SPLIT         =120;
    params.MIN_NO_CONTACT_CUTOFF_MID= 30;
    params.MIN_NO_CONTACT_CUTOFF_END= 10;
    params.E_WEIGHT                 =  0.1;

    params.MAX_ALLOWABLE_GLOB       =  2.85;

    params.MIN_PEAK_SS_ONLY_C       = 17.05;
    params.MIN_PEAK_C               =  9.5;
    params.MIN_PEAK_SS_ONLY_DC      = 17.05;
    params.MIN_PEAK_DC              =  9.5;
    params.MIN_PEAK_SS_ONLY_MC      = 17.05;
    params.MIN_PEAK_MC              =  9.5;

    params.MIN_PEAK_BLO_C           = 60.00;
    params.MIN_PEAK_SS_ONLY_BLO_C   = 60.00;
    params.MIN_PEAK_BLO_DC          = 60.00;
    params.MIN_PEAK_SS_ONLY_BLO_DC  = 60.00;

    params.MIN_SS_PER               = 0.57;

    params.MIN_HELIX_LENGTH         =  5;
    params.HELIX_RAMP               =  4;
    params.HELIX_REDUCE_C_DENS      = 10.32;

    params.INCREMENT_DIVIDER        =250;
  
/* get parameters */
    if (get_params) {
        test = GJfopen(parameters_file_name, "r", 1);
        asd_get_params(test);
        fclose(test);
    } 
    else {
        fprintf(stdout, "# Using default parameters\n");
    } /*if*/
/* write out parameters */
    
    fprintf(stdout, "# MIN_DOMAIN_SIZE %d\n", params.MIN_DOMAIN_SIZE);
    fprintf(stdout, "# MIN_SEGMENT_SIZE_END %d\n", params.MIN_SEGMENT_SIZE_END);
    fprintf(stdout, "# MIN_SEGMENT_SIZE_MID %d\n", params.MIN_SEGMENT_SIZE_MID);
    fprintf(stdout, "# MIN_DOUBLE_SPLIT %d\n", params.MIN_DOUBLE_SPLIT);
    fprintf(stdout, "# MIN_NO_CONTACT_CUTOFF_MID %d\n", params.MIN_NO_CONTACT_CUTOFF_MID);
    fprintf(stdout, "# MIN_NO_CONTACT_CUTOFF_END %d\n", params.MIN_NO_CONTACT_CUTOFF_END);
    fprintf(stdout, "# E_WEIGHT %f\n", params.E_WEIGHT);
    fprintf(stdout, "# MAX_ALLOWABLE_GLOB %f\n", params.MAX_ALLOWABLE_GLOB);
    fprintf(stdout, "# MIN_PEAK_SS_ONLY_C %f\n", params.MIN_PEAK_SS_ONLY_C);
    fprintf(stdout, "# MIN_PEAK_C %f\n", params.MIN_PEAK_C);
    fprintf(stdout, "# MIN_PEAK_SS_ONLY_DC %f\n", params.MIN_PEAK_SS_ONLY_DC);
    fprintf(stdout, "# MIN_PEAK_DC %f\n", params.MIN_PEAK_DC);
    fprintf(stdout, "# MIN_PEAK_SS_ONLY_MC %f\n", params.MIN_PEAK_SS_ONLY_MC);
    fprintf(stdout, "# MIN_PEAK_MC %f\n", params.MIN_PEAK_MC);
    fprintf(stdout, "# MIN_PEAK_BLO_C %f\n", params.MIN_PEAK_BLO_C);
    fprintf(stdout, "# MIN_PEAK_SS_ONLY_BLO_C %f\n", params.MIN_PEAK_SS_ONLY_BLO_C);
    fprintf(stdout, "# MIN_PEAK_BLO_DC %f\n", params.MIN_PEAK_BLO_DC);
    fprintf(stdout, "# MIN_PEAK_SS_ONLY_BLO_DC %f\n", params.MIN_PEAK_SS_ONLY_BLO_DC);
    fprintf(stdout, "# MIN_SS_PER %f\n", params.MIN_SS_PER);
    fprintf(stdout, "# MIN_HELIX_LENGTH %d\n", params.MIN_HELIX_LENGTH);
    fprintf(stdout, "# HELIX_RAMP %d\n", params.HELIX_RAMP);
    fprintf(stdout, "# HELIX_REDUCE_C_DENS %f\n", params.HELIX_REDUCE_C_DENS);
    fprintf(stdout, "# INCREMENT_DIVIDER %d\n", params.INCREMENT_DIVIDER);

/* call function that reads and checks domains */
  
    code = asstr_cat_safe(asstr_save(code_id), asstr_save("-"));
   
    dssp_file_name = RBR_getfile(code_id,
                 asstr_cat_safe(asstr_save(env), "/dssp_files"), 4, stdout);
    if (dssp_file_name[0] == '\0')
	file_open_error(code_id,env,"/dssp_files");
    
    contacts_file_name = RBR_getfile(code_id,
                 asstr_cat_safe(asstr_save(env), "/contacts_files"), 4, stdout);
    if (contacts_file_name[0] == '\0')
	file_open_error(code_id,env,"/contacts_files");

    pdb_file_name = RBR_getfile(code_id,
                 asstr_cat_safe(asstr_save(env), "/pdb_files"), 4, stdout);

    if (pdb_file_name[0] == '\0')
	file_open_error(code_id,env,"/pdb_files");

    fprintf(stdout, "# pdb file : %s\n", pdb_file_name);
    fprintf(stdout, "# dssp file : %s\n", dssp_file_name);
    fprintf(stdout, "# contacts file : %s\n", contacts_file_name);

    test = GJfopen(dssp_file_name, "r", 1);
    fclose(test);
    test = GJfopen(contacts_file_name, "r", 1);
    fclose(test);
    test = GJfopen(pdb_file_name, "r", 1);
    fclose(test);
    
    rasmol_file_name = asstr_cat_safe(asstr_save(code_id),
                                      asstr_save(".rasmol"));
  
    test = GJfopen(rasmol_file_name, "w", 1);
    fclose(test);

    fprintf(stdout, "# Reading in contacts file....\n");
    
    asd_read_all_info(contacts_file_name, dssp_file_name, &num_atoms,
                      &bn, &ss, &bridges, &sheet, &contact_info,
                      &contact_rev_info);
/*
 * ************ cid temporary argument in design stage
 */
    ass_set_code(code);
    ass_set_pdb_file(pdb_file_name);
    if (cid == '*') {
        fprintf(stdout, "# Analysing all residues in %s\n", code_id);
    } else {
        fprintf(stdout, "# Analysing residues in chain %c of %s\n",
                cid, code_id);
    } /*if*/
    d_list = asd_work_out_domains(num_atoms, contact_info,
                                  contact_rev_info, bn, cid, ss);

  
    asd_print_domains(d_list, bn, stdout, NULL, cid);
    output_fptr = GJfopen(output_file_name, "a", 1);
    rasmol_fptr = GJfopen(rasmol_file_name, "w", 1);
    asd_print_domains(d_list, bn, output_fptr, rasmol_fptr, cid);
    fclose(output_fptr);
    fclose(rasmol_fptr);
    free(bn);
    free(ss);
    free(bridges);
    free(sheet);
    j = 0;
    while (j <= num_atoms) {
        free(contact_info[j]);
        free(contact_rev_info[j]);
        j++;
    } /*while*/
    free(contact_info);
    free(contact_rev_info);

} /*main*/

void file_open_error(char *code_id,char *env,char *file){
    fprintf(stderr,"file_open_error:\n");
    fprintf(stderr,"Error getting file for code %s\n",code_id);
    fprintf(stderr,"DOMAKDIR: %s\n",env);
    fprintf(stderr,"directories file: %s%s\n",env,file);
    fprintf(stderr,"Either:\n");
    fprintf(stderr,"- the directories file was not found or\n");
    fprintf(stderr,"- the target file could not be found using the rules in the directories file.\n");
    fprintf(stderr,"-----\n");
    exit(2);
}
