/******************************************************************************
 The computer software and associated documentation called DOMAK hereinafter
 referred to as the WORK which is more particularly identified and described in
 Appendix A of the file LICENSE.  Conditions and restrictions for use of
 this package are also in this file.

 This routine was written by Asim S. Siddiqui

 The WORK is Copyright (1995) A. S. Siddiqui and G. J. Barton

 All use of the WORK must cite:
 Siddiqui, A. S. and Barton, G. J., "Continuous and Discontinuous Domains: An
 Algorithm for the Automatic Generation of Reliable Protein Domain Definitions" 
 PROTEIN SCIENCE, 4:872-884 (1995).
*****************************************************************************/

/*
 * Title
 *    asd_contacts_utils.c
 * Purpose
 *    to provide a set of utiulties for interacting with the contacts 
 *    routines
 * Author
 *    Asim Siddiqui
 */

#include <stdio.h>
#include <string.h>
#include <rdssp.h>
#include <gjutil.h>
#include <asm_mop.h>
#include <ase_error.h>
#include <asd_structs.h>
#include <asd_contacts_utils.h>
#include <rbr_newlib.h>

extern Asd_Parameters params;

void
asd_read_all_info(char *contacts_file_name, char *dssp_file_name,
                  int *real_num_atoms, struct brookn **contacts_bn,
                  char **contacts_ss, struct br **contacts_bridges,
                  char **contacts_sheet, Asd_Contact_Info ***contacts_info,
                  Asd_Contact_Info ***contacts_rev_info)
{
    FILE *contacts_fptr;        /* file pointer                    */
    int end;                    /* end of file reached             */
    int flag;                   /* flag returned from file         */
    int binary;                 /* whether file is binary          */
    int i;                      /* loop counter                    */
    int j;                      /* loop counter                    */
    int k, l;                   /* loop counters                   */
    int k2;                     /* loop counters                   */
    int con_num;                /* contacts number                 */
    int b1, b2;                 /* brookn residue number           */
    int num1, num2;             /* atom number                     */
    int c1 , c2;                /* contacts residue number         */
    int num_atoms;              /* dssp number of atoms            */
    int num_atoms2;             /* dssp number of atoms            */
    int num_contacts;           /* number of contacts              */
    char temp[120], temp2[120]; /* temporary buffers               */
    char buff[2][10];           /* atom type buffer                */
    char res1[10], res2[10];    /* residue name                    */
    char s1, s2;                /* secondary structure             */
    char cx;                    /* contact type                    */
    char extra[6];              /* store chain and insertion codes */
    char *ss;                   /* dssp secondary structure        */
    char *aa;                   /* dssp amino acid sequence        */
    char *sheet;                /* dssp sheet structure            */
    float dist;                 /* contacts dist                   */
    struct br *bridges;         /* dssp bridge structure           */
    struct brookn *bn;          /* dssp brookn structure           */

/* read dssp file checking for error in read */
    ss = get_ss(dssp_file_name, &num_atoms);
    aa = get_aa(dssp_file_name, &num_atoms2);
    if (num_atoms != num_atoms2) {
        ase_error_fatal("asd_read_all_info",
                        "error number of atoms do not match up");
    } /*if*/
    bn = get_bn(dssp_file_name, &num_atoms2);
    if (num_atoms != num_atoms2) {
        ase_error_fatal("asd_read_all_info",
                        "error number of atoms do not match up");
    } /*if*/
    bridges = get_bridge(dssp_file_name, &num_atoms2);
    if (num_atoms != num_atoms2) {
        ase_error_fatal("asd_read_all_info",
                        "error number of atoms do not match up");
    } /*if*/
    sheet = get_sheet(dssp_file_name, &num_atoms2);
    if (num_atoms != num_atoms2) {
        ase_error_fatal("asd_read_all_info",
                        "error number of atoms do not match up");
    } /*if*/

/* read in number of atoms from contacts file */
    *real_num_atoms = 0;

    if (strcmp(&contacts_file_name[strlen(contacts_file_name)-2],".b")==0) {
        binary = 1;
        contacts_fptr = GJfopen(contacts_file_name, "rb", 1);
    } else {
        binary = 0;
        contacts_fptr = GJfopen(contacts_file_name, "r", 1);
    } /*if*/

    end = 0;
    while (!end) {
        if (binary) {
            flag = RBR_readcbin(temp, contacts_fptr, stdout);
            if (flag == -1) {
                ase_error_fatal("asd_read_all_info", "error reading contacts");
            } else if (flag == 1) {
                end = 1;
            } /*if*/
        } else {
            if (fgets(temp, 100, contacts_fptr) == NULL) {
                end = 1;
            } /*if*/
        } /*if*/

        if (end) break;

        sscanf(temp, "%d %s %s", &con_num, &res1, &temp2);
        if (strcmp(res1, "HET") != 0) {
            sscanf(temp,
         "%d %s %4d %c %4d %c %s %5d %c  %s %4d %c %4d %c %s %5d %c  %c %f %s",
         &con_num, &res1, &c1, &extra[0], &b1, &extra[1], &buff[0], &num1, &s1,
         &res2, &c2, &extra[2], &b2, &extra[3], &buff[1], &num2,
         &s2, &cx, &dist, &temp2);
            if (c1 > *real_num_atoms) {
                *real_num_atoms = c1;
            } /*if*/
            if (c2 > *real_num_atoms) {
                *real_num_atoms = c2;
            } /*if*/
        } /*if*/
    } /*while*/
    fclose(contacts_fptr);

/* allocate memory for structures */
    *contacts_bn = (struct brookn *) asm_malloc(sizeof(struct brookn) *
                                               (*real_num_atoms));
    *contacts_ss = (char *) asm_malloc(sizeof(char) *
                                               (*real_num_atoms));
    *contacts_bridges = (struct br *) asm_malloc(sizeof(struct br) *
                                               (*real_num_atoms));
    *contacts_sheet = (char *) asm_malloc(sizeof(char) *
                                               (*real_num_atoms));
/* I add one to size of contacts info so that in assigning contacts I don't
 * have to subtract one from the contact residue number which are numbered
 * from one
 */
    *contacts_info = (Asd_Contact_Info **)
                               asm_malloc(sizeof(Asd_Contact_Info *) *
                                               (*real_num_atoms + 1));
    *contacts_rev_info = (Asd_Contact_Info **)
                               asm_malloc(sizeof(Asd_Contact_Info *) *
                                               (*real_num_atoms + 1));

/* initialise all positions */
    i = 0;
    while (i < *real_num_atoms) {
        (*contacts_ss)[i] = ' ';
        (*contacts_bridges)[i].one = 0;
        (*contacts_bridges)[i].two = 0;
        (*contacts_sheet)[i] = ' ';
        (*contacts_bn)[i].cid = ' ';
        (*contacts_bn)[i].in = ' ';
        (*contacts_bn)[i].n = -100000;
        i++;
    } /*while*/

    i = 0;
    while (i <= *real_num_atoms) {
        (*contacts_info)[i] = (Asd_Contact_Info *)
                               asm_malloc(sizeof(Asd_Contact_Info));
        (*contacts_info)[i][0].res_num = 0;
        (*contacts_rev_info)[i] = (Asd_Contact_Info *)
                               asm_malloc(sizeof(Asd_Contact_Info));
        (*contacts_rev_info)[i][0].res_num = 0;
        i++;
    } /*while*/

/* now read in all info from contacts file */
    if (binary) {
        contacts_fptr = GJfopen(contacts_file_name, "rb", 1);
    } else {
        contacts_fptr = GJfopen(contacts_file_name, "r", 1);
    } /*if*/

    end = 0;
    while (!end) {
        if (binary) {
            flag = RBR_readcbin(temp, contacts_fptr, stdout);
            if (flag == -1) {
                ase_error_fatal("asd_read_all_info", "error reading contacts");
            } else if (flag == 1) {
                end = 1;
            } /*if*/
        } else {
            if (fgets(temp, 100, contacts_fptr) == NULL) {
                end = 1;
            } /*if*/
        } /*if*/

        if (end) break;

        sscanf(temp, "%d %s %s", &con_num, &res1, &temp2);
        if (strcmp(res1, "HET") != 0) {
            sscanf(temp,
         "%d %s %4d %c %4d %c %s %5d %c  %s %4d %c %4d %c %s %5d %c  %c %f %s",
         &con_num, &res1, &c1, &extra[0], &b1, &extra[1], &buff[0], &num1, &s1,
         &res2, &c2, &extra[2], &b2, &extra[3], &buff[1], &num2,
         &s2, &cx, &dist, &temp2);
/* sort out brookn chain, numbering and insertion codes */
            (*contacts_bn)[c1 - 1].n = b1;
            if (extra[0] != '_') {
                (*contacts_bn)[c1 - 1].cid = extra[0];
            } /*if*/
            if (extra[1] != '_') {
                (*contacts_bn)[c1 - 1].in = extra[1];
            } /*if*/
            (*contacts_bn)[c2 - 1].n = b2;
            if (extra[2] != '_') {
                (*contacts_bn)[c2 - 1].cid = extra[2];
            } /*if*/
            if (extra[3] != '_') {
                (*contacts_bn)[c2 - 1].in = extra[3];
            } /*if*/
/* read in contacts info */
            num_contacts = (*contacts_info)[c1][0].res_num;
            num_contacts++;
            (*contacts_info)[c1] = (Asd_Contact_Info *) asm_realloc(
            (*contacts_info)[c1], sizeof(Asd_Contact_Info)*(num_contacts + 1));
            (*contacts_info)[c1][0].res_num = num_contacts;
            (*contacts_info)[c1][num_contacts].res_num = c2;
            (*contacts_info)[c1][num_contacts].ctype = cx;
/* make reverse list */
            num_contacts = (*contacts_rev_info)[c2][0].res_num;
            num_contacts++;
            (*contacts_rev_info)[c2] = (Asd_Contact_Info *) asm_realloc(
            (*contacts_rev_info)[c2],
            sizeof(Asd_Contact_Info)*(num_contacts + 1));
            (*contacts_rev_info)[c2][0].res_num = num_contacts;
            (*contacts_rev_info)[c2][num_contacts].res_num = c1;
            (*contacts_rev_info)[c2][num_contacts].ctype = cx;
        } /*if*/
    } /*while*/

/* now match all dssp numbers and info to contacts numbers */
/* speed ups here possible */
    i = 0;
    while (i < num_atoms) {
        if (aa[i] != '!') {
            j = 0;
            while (j < *real_num_atoms && (bn[i].cid != contacts_bn[0][j].cid ||
                   bn[i].n != contacts_bn[0][j].n || bn[i].in !=
                   contacts_bn[0][j].in)) {
                j++;
            } /*while*/
            if (j == *real_num_atoms) {
                ase_error_warn("cant match up dssp and contacts");
            } else {
                contacts_ss[0][j] = ss[i];
                contacts_sheet[0][j] = sheet[i];
            } /*if*/
        } /*if*/
/* sort out bridges */
/* only need next highest connection */
        if (ss[i] == 'E') {
            if (bridges[i].two == 0) {
                k = bridges[i].one;
                k2 = 0;
            } else if (bridges[i].one == 0) {
                k = 0;
                k2 = bridges[i].two;
            } else {
                k = bridges[i].one;
                k2 = bridges[i].two;
            } /*if*/
            if (k != 0) {
                l = 0;
                while (l < *real_num_atoms && (bn[k].cid !=
                      contacts_bn[0][l].cid || bn[k].n != contacts_bn[0][l].n
                       || bn[k].in != contacts_bn[0][l].in)) {
                    l++;
                } /*while*/
                if (l == *real_num_atoms) {
                    ase_error_warn("cant match up dssp and contacts");
                } else {
                    contacts_bridges[0][j].one = l;
                } /*if*/
            } /*if*/
            if (k2 != 0) {
                l = 0;
                while (l < *real_num_atoms && (bn[k2].cid !=
                      contacts_bn[0][l].cid || bn[k2].n != contacts_bn[0][l].n
                       || bn[k2].in != contacts_bn[0][l].in)) {
                    l++;
                } /*while*/
                if (l == *real_num_atoms) {
                    ase_error_warn("cant match up dssp and contacts");
                } else {
                    contacts_bridges[0][j].two = l;
                } /*if*/
            } /*if*/
        } /*if*/
        i++;
    } /*while*/

/* make assumption in the case of missing atoms that they have the same aspects
   of the next one in the file */
   i = (*real_num_atoms - 1);
   while (i >= 0) {
       if (contacts_bn[0][i].n == -100000) {
           if (i != (*real_num_atoms - 1)) {
               contacts_bn[0][i].n = contacts_bn[0][i + 1].n;
               contacts_bn[0][i].cid = contacts_bn[0][i + 1].cid;
               contacts_bn[0][i].in = contacts_bn[0][i + 1].in;
           } else {
               contacts_bn[0][i].n = contacts_bn[0][i - 1].n;
               contacts_bn[0][i].cid = contacts_bn[0][i - 1].cid;
               contacts_bn[0][i].in = contacts_bn[0][i - 1].in;
           } /*if*/
        } /*if*/
        i--;
    } /*while*/

/* now add sheet contacts */
    i = 1;
    while (i <= *real_num_atoms) {
        if (contacts_ss[0][i - 1] == 'E' &&
            contacts_bridges[0][i - 1].one != 0) {
            c2 = contacts_bridges[0][i - 1].one;
            if (c2 > i) {
                num_contacts = (*contacts_info)[i][0].res_num;
                num_contacts++;
                (*contacts_info)[i] = (Asd_Contact_Info *) asm_realloc(
                (*contacts_info)[i],
                sizeof(Asd_Contact_Info)*(num_contacts + 1));
                (*contacts_info)[i][0].res_num = num_contacts;
                (*contacts_info)[i][num_contacts].res_num = c2;
                (*contacts_info)[i][num_contacts].ctype = 'E';
            } else {
/* sort out reverse contacts */
                num_contacts = (*contacts_rev_info)[c2][0].res_num;
                num_contacts++;
                (*contacts_rev_info)[c2] = (Asd_Contact_Info *) asm_realloc(
                (*contacts_rev_info)[c2],
                sizeof(Asd_Contact_Info)*(num_contacts + 1));
                (*contacts_rev_info)[c2][0].res_num = num_contacts;
                (*contacts_rev_info)[c2][num_contacts].res_num = i;
                (*contacts_rev_info)[c2][num_contacts].ctype = 'E';
            } /*if*/
        } /*if*/
        if (contacts_ss[0][i - 1] == 'E' &&
            contacts_bridges[0][i - 1].two != 0) {
            c2 = contacts_bridges[0][i - 1].two;
            if (c2 > i) {
                num_contacts = (*contacts_info)[i][0].res_num;
                num_contacts++;
                (*contacts_info)[i] = (Asd_Contact_Info *) asm_realloc(
                (*contacts_info)[i],
                sizeof(Asd_Contact_Info)*(num_contacts + 1));
                (*contacts_info)[i][0].res_num = num_contacts;
                (*contacts_info)[i][num_contacts].res_num = c2;
                (*contacts_info)[i][num_contacts].ctype = 'E';
/* sort out reverse contacts */
            } else {
                num_contacts = (*contacts_rev_info)[c2][0].res_num;
                num_contacts++;
                (*contacts_rev_info)[c2] = (Asd_Contact_Info *) asm_realloc(
                (*contacts_rev_info)[c2],
                sizeof(Asd_Contact_Info)*(num_contacts + 1));
                (*contacts_rev_info)[c2][0].res_num = num_contacts;
                (*contacts_rev_info)[c2][num_contacts].res_num = i;
                (*contacts_rev_info)[c2][num_contacts].ctype = 'E';
            } /*if*/
        } /*if*/
        i++;
    } /*while*/


/* free unused structures */
    free(aa);
    free(ss);
    free(bridges);
    free(sheet);
    free(bn);
} /*asd_read_all_info*/
