C**********************************************************
      PROGRAM SORT
C----------------------------------------------------------
C Program to determine the optimal order for the vertices of
C a square matrix which exhibits a  similarity score
C
C Better known as ORDER.
C
C If you don't understand this comment then you shouldn't
C be reading it...    G.J.Barton 11/1986
C
C Reads the output file of the pairwise sequence comparison
C program 'BMULT'
C
C Includes option to call clust to perform single link
C cluster analysis and produce a dendrogram
C
C Version 2.0:  Unix compatible 8/12/87
C
C Version 2.1:  Includes option to write out a file 
C               in the correct format for the program ADDTREE
C
C Version 2.2:  Includes option to write out a cluster order file
C               and a full tree file describing the building of the clusters
C               These files may be used by AMULT to perform tree-based 
C               multiple alignment
C
C Version 2.3   Allows prolog terms to be extracted from the pairwise data.
C
C               
C Formats: 
C         ORDER_FILE    1x,i10,f20.2,i10
C                (index to sequence, score, sequence that gave the score)
C
C         TREE_FILE     1x,20i5
C                (this is confusing format.  The first number indicates the
C                number of sequences in the first half of the cluster joining
C                operation.  The numbers that follow are the indeces of the 
C                ORDERED sequences that make up that half of the cluster.
C                The next number indicates the number of sequences in the other
C                half of the next cluster and so on...
C
C Modified 16/May 1988:  To allow the output of NAS v SD in Kricket Graph
C                        tabbed column format.
C
C Modified 5 Oct 1988:   Now allows prolog pairs to be written and
C                        clusters above a particular cutoff.
C
C Modified 6 Oct 1988:   Permits multiple runs on the same data without
C                        having to re-read the data
C
C Modified 24th April 1989:  Now allows the data to be written straight out
C                            in prolog form.
C
C NOTE to programmers:  
C Changing MAXARR in this routine MUST be accompanied by changing
C          MACLUS in clust.blk to the same value - naff I know.
C
C sort_simple - simplified version just runs the normal defaults and nothing else.
C
C------------------------------------------------------------------------
C
C
C
      INTEGER  MAXDIM,MAXARR
      PARAMETER(maxarr = 500,maxdim=(maxarr*maxarr)/2)
      INTEGER  A(MAXDIM),B(MAXDIM),NUMA(MAXDIM),NUMB(MAXDIM),
     -         NGAPS(MAXDIM),NALIG(MAXDIM),NIDENT(MAXDIM),
     -         NRANS(MAXDIM),POS(MAXDIM)
C
      REAL     MATCH(MAXDIM),PIDENT(MAXDIM),NAS(MAXDIM),
     -         NASAL(MAXDIM),RMEAN(MAXDIM),STDEV(MAXDIM),
     -         RSCORE(MAXDIM)
C
      REAL     SCMAT(MAXARR,MAXARR),SCR(MAXDIM)
      INTEGER  MAXLEN,I,J,LINES,ORD(MAXDIM),LOC(MAXDIM),
     -         NUMSEQ,IMAXA,CHOICE,IOS,ndim,nn,EXCTOT,K,ii
C
      LOGICAL  ERROR,ablank,fexist,savec,savclu,first
C
      REAL XMAX,XMIN,YMAX,YMIN
      INTEGER ENDFIN,IFORM,JST,IEXC(MAXARR),conv(maxarr),
     -        cindx
C
C change inchar and justl to *7  from *5
      CHARACTER*131 LINE,inchar*7,ident(maxarr)*20,justl*7,
     -                   DETAIL(MAXARR)
C
      CHARACTER*39 FORM(2)
      data form/'(1X,4I5,F10.2,3I7,3F10.2,I7,3F10.2,I7/)',
     -          '(1x,4I5,F10.2,3I7,3F10.2,I7,3F10.2,I7) '/

      ERROR  = .FALSE.
      MAXLEN = MAXARR
      savclu = .false.
      CALL INITR(SCMAT,MAXLEN*MAXLEN,-10000.0)
C
C --  call routine to give banner and copyright notice.
C      call amess(6,1,'Program     O R D E R',1,'-')
C      call amess(6,1,'Processes MULTALIGN pairwise output',1,' ')
C      call amess(6,0,'Author: Geoff Barton',0,' ')
C
C      call local
C
C      write(6,6000)maxlen
6000  format(/1x,'Maximum Number of sequences allowed: ',I5/)
C      call amess(6,1,
C     -'!!NEW VERSION 17/2/92!!!! MAKES PostScript Trees!!',1,'!')
333   continue
C      CALL QMESS(6,5,1,'Enter pairwise filename: ',LINE)
      read(5,*)line
C
      OPEN(UNIT=1,FILE=LINE,STATUS='OLD',iostat=ios)
      if(ios.ne.0)then
        call amess(6,1,'Error: cannot open file',1,'e')
        inquire(file=line,exist=fexist)
        if(.not.fexist)then
          call amess(6,0,'File does not exist',1,'e')
        end if
        stop
      end if
C
C
C      CALL QMESS(6,5,1,'Does the input file have timings? (Def = N) :',
C     -line)
C      CALL CUCASE(LINE)
C      if(index(line,'Y').gt.0)then
C         iform = 1
C      else
C         iform = 2
C      end if
      iform = 2
C
C-    read file until 'I' found in correct column
      nn = 0
1235  CONTINUE
      READ(1,6010,iostat=ios)LINE
      if(ios.ne.0)then
        call amess(6,1,'Error reading from file unit=1',1,' ')
        stop
      end if
      if(index(line,'I    J ILEN JLEN').eq.0)then
         if(index(line,'>').gt.0)then
           nn = nn+1
           ident(nn)=line(index(line,'>'):(index(line,'>')+19))
           DETAIL(NN)=LINE
         end if
         goto 1235
      end if
6010  FORMAT(A131)
C
      I = 0
1     CONTINUE
      I = I+1
      READ(1,END=1000,IOSTAT=IOS,fmt=form(iform))
     -                   A(I),B(I),
     -                   NUMA(I),NUMB(I),MATCH(I),
     -                   NGAPS(I),NALIG(I),NIDENT(I),
     -                   PIDENT(I),NAS(I),NASAL(I),
     -                   NRANS(I),RMEAN(I),STDEV(I),
     -                   RSCORE(I),POS(I)
      IF(IOS.NE.0)THEN
C
C --    use end of file or error condition to exit (bit dangerous)
C
        GOTO 1000
      ELSE
        GOTO 1
      END IF
1000  CONTINUE
      LINES = I-1
C
C      call amess(6,1,'number of lines read in = '//inchar(lines),
C     -           1,'-')
C

C
C
      NUMSEQ = IMAXA(B,LINES)
C
C
C      CALL QMESS(6,5,1,'ENTER FILENAME FOR COMPARISONS TO EXCLUDE'//
C     -' [none]',LINE)
      line = ""
      IF(.NOT.ABLANK(LINE))THEN
        OPEN(UNIT=11,FILE=LINE,STATUS='OLD',IOSTAT=IOS)
        IF(IOS.NE.0)THEN
          inquire(file=line,exist=fexist)
          if(.not.fexist)then
            call amess(6,1,'File does not exist',1,'e')
          end if
          stop
        end if 
C
        I = 0
2231    CONTINUE
        READ(11,6010,IOSTAT=IOS)LINE
        IF(IOS.NE.0)GOTO 3244
        I = I+1
        CALL INTCOM(LINE,IEXC(I),ERROR)
            IF(ERROR)THEN
               EXCTOT=I-1
               GOTO 3244
            END IF
        GOTO 2231
3244    CONTINUE
        EXCTOT = I
C
C --    get rid of unwanted sequence lines
        DO 5546,I=1,EXCTOT
           CALL SCHAR(1,DETAIL(IEXC(I)))
5546    CONTINUE
        K=0
        DO 3332,I=1,NN
           IF(.NOT.ABLANK(DETAIL(I)))THEN
             K=K+1
             DETAIL(K)=DETAIL(I)(1:ENDFIN(DETAIL(I)))//INCHAR(K)
C            set index for new sequence number
             CONV(I)=K
           END IF
3332    CONTINUE
        NN = K
C
C --    get rid of all unwanted comparisons
        K = 0
        JST = 1
        DO 627,I=1,LINES
           DO 3325,J=JST,EXCTOT
              IF(A(I).EQ.IEXC(J).OR.
     -           B(I).EQ.IEXC(J))THEN
                 GOTO 627
              END IF
3325       CONTINUE
           K=K+1
           A(K)=conv(a(i))
           B(K)=conv(b(i))
           NUMA(K)=NUMA(I)
           NUMB(K)=NUMB(I)
           MATCH(K)=MATCH(I)
           NGAPS(K)=NGAPS(I)
           NALIG(K)=NALIG(I)
           NIDENT(K)=NIDENT(I)
           PIDENT(K)=PIDENT(I)
           NAS(K) = NAS(I)
           NASAL(K)=NASAL(I)
           NRANS(K)=NRANS(I)
           RMEAN(K)=RMEAN(I)
           STDEV(K)=STDEV(I)
           RSCORE(K)=RSCORE(I)
           POS(K)=POS(I)
627     CONTINUE
        LINES = K
        NUMSEQ = NN
C
C
C --    write out the new list of sequences
        CALL QMESS(6,5,1,'ENTER FILENAME FOR NEW LIST ',LINE)
        OPEN(UNIT=21,FILE=LINE,STATUS='NEW',
     -  iostat=ios)
        if(ios.ne.0)then
          call amess(6,1,'Error opening file',1,'e')
          inquire(file=line,exist=fexist)
          if(fexist)then
            call amess(6,1,'File already exists',1,'e')
          end if
          stop
        end if
        DO 1011,I=1,NUMSEQ
           WRITE(21,2012)DETAIL(I)
2012       FORMAT(A132)
1011    CONTINUE
        WRITE(21,2222)'     I    J ILEN JLEN     MATCH  NGAPS  NALIG'//
     -  ' NIDENT    %IDENT       NAS     NASAL  NRANS     RMEAN'//
     -  '     STDEV     SCORE'
2222    FORMAT(A)
        DO 2314,I=1,LINES
           WRITE(21,fmt=form(2))
     -                   A(I),B(I),
     -                   NUMA(I),NUMB(I),MATCH(I),
     -                   NGAPS(I),NALIG(I),NIDENT(I),
     -                   PIDENT(I),NAS(I),NASAL(I),
     -                   NRANS(I),RMEAN(I),STDEV(I),
     -                   RSCORE(I),POS(I)
2314   CONTINUE
      END IF
C
C -- option to write out the file in prolog form
C
C      call qmess(6,5,1,'Write out prolog format file [N]: ',line)
      line = "N"
      if(cindx(line,'Y').gt.0)then
         call qmess(6,5,1,'Enter file for Prolog output: ',line)
         open(unit=45,file=line,status='new',iostat=ios)
         if(ios.ne.0)then
            call amess(6,1,'Error opening file: '//line,1,'e')
            stop
         end if
         do 4517,i=1,nn
            write(45,4514)ident(i)(:endfin(ident(i)))
4517     continue
4514     format('id(',a,').')
         do 4511,i=1,lines
            write(45,4513)
     -                   A(I),B(I),
     -                   NUMA(I),NUMB(I),MATCH(I),
     -                   NGAPS(I),NALIG(I),NIDENT(I),
     -                   PIDENT(I),NAS(I),NASAL(I),
     -                   NRANS(I),RMEAN(I),STDEV(I),
     -                   RSCORE(I),POS(I)
4511     continue
4513     format('o('i5,',',i5,',',i5,',',i5,',',f10.2,
     -             ',',i7,',',i7,',',i7,',',
     -             f10.2,',',f10.2,',',f10.2,',',i7,',',
     -             f10.2,',',f10.2,',',f10.2,',',i7,').')
         call amess(6,1,'Prolog file written',1,' ')
      end if
C
C what do you want to order on??
C
C --start of main loop 
C
C     flag to indicate that this is the first time round
      first = .true.
      savec = .false.
      savclu = .false.
6666  continue
C
C      CALL AMESS(6,1,
C     -'Options: NGAPS(1),PIDENT(2),NAS(3),NASAL(4),RSCORE(5) ?',
C     -0,' ')
C      CALL QMESS(6,5,1,'Enter choice :',LINE)
      line = "3"
      CALL INTCOM(LINE,CHOICE,ERROR)
      IF(ERROR.OR.CHOICE.GT.5.OR.CHOICE.LT.1)then
        call amess(6,1,'Unrecognised option',1,' ')
        goto 7777
      end if
C
C put the relevent columns into the SCMAT
C Yes I know this is a horrible bit of code, but I can't be
C bothered rewriting the bit above at the moment.....
C
      DO 10,I=1,LINES
         IF(CHOICE.EQ.1)THEN
           SCMAT(A(I),B(I))=NGAPS(I)
         ELSE IF (CHOICE.EQ.2)THEN
           SCMAT(A(I),B(I))=PIDENT(I)
         ELSE IF (CHOICE.EQ.3)THEN
           SCMAT(A(I),B(I))=NAS(I)
         ELSE IF (CHOICE.EQ.4)THEN
           SCMAT(A(I),B(I))=NASAL(I)
         ELSE IF (CHOICE.EQ.5)THEN
           SCMAT(A(I),B(I))=RSCORE(I)
         END IF
10    CONTINUE
C
C --  redefine the identifiers for plotting
      if(first)then
      do 3521,i=1,numseq
         call schar(1,ident(i))
         if(cindx(detail(i),'>p1;').gt.0)then
          ident(i)=
     -    detail(i)((index(detail(i),'>')+4):(index(detail(i),'>')+19))
         else
          ident(i)=
     -    detail(i)((index(detail(i),'>')+1):(index(detail(i),'>')+19))
         end if
3521  continue
      end if
C
C
C      call amess(6,1,'Number items compared '//inchar(numseq),
C     -           1,'-')
C      call amess(6,0,'Number of identifiers '//inchar(nn),
C     -           1,'-')
      if(nn.ne.numseq)then
        call amess(6,1,'Headers don''t match seqs!!!!!',1,'e')
        stop
      end if

C      DO 33, I=1,NUMSEQ
C        WRITE(6,601)(SCMAT(I,J),J=1,NUMSEQ)
C33    CONTINUE
C
C
C601   FORMAT(1X,20F10.1)
C
C      call qmess(6,5,1,'Produce an order file? [Y] ',
C     - line)
C      call cucase(line)
      line = "N"
      if(index(line,'N').eq.0)then
         CALL QMESS(6,5,1,'Enter filename for order ',LINE)
         OPEN(UNIT=7,STATUS='NEW',FILE=LINE,iostat=ios)
         if(ios.ne.0)then
           call amess(6,1,'error opening file',1,'e')
           inquire(file=line,exist=fexist)
           if(fexist)then
             call amess(6,1,'File already exists',1,' ')
           end if
           goto 7777
         end if
         CALL ORDER(SCMAT,MAXLEN,NUMSEQ,A,B,ORD,SCR,LOC)
         DO 34,I=1,NUMSEQ
            WRITE(7,602)ORD(I),SCR(I),LOC(I)
34       CONTINUE
602      FORMAT(1X,I10,F20.2,I10)
         call amess(6,1,'Order file written',1,' ')
         close(7)
C
C --     do we want to continue?
         goto 7777
      end if
C
C --  cluster analysis option
C      call qmess(6,5,1,'Perform cluster analysis? [Y] ',line)
C      call cucase(line)
      line = "Y"
      if(index(line,'N').eq.0)then
C       reflect the matrix values
        do 856,i=2,numseq
           ii=i-1
           do 856,j=1,ii
             scmat(i,j)=scmat(j,i)
856     continue
C        do 888,i=1,numseq
C           write(6,6661)(scmat(i,j),j=1,numseq)
C888     continue
C6661    format(1x,10f5.0)
C
C
C --    option to save the cluster details to an order file and tree file
C        call qmess(6,5,1,'Save full cluster details? [Y] ',line)
C        call cucase(line)
        line = "Y"
        if(index(line,'N').eq.0)then
           savec=.true.
142        continue
C           call qmess(6,5,1,'Enter file to save cluster order: ',line)
           read(5,*)line
           open(unit=72,file=line,status='new',iostat=ios)
           if(ios.ne.0)then
             call amess(6,1,'Error: cannot open file',1,'e')
             goto 142
           end if
143        continue
           call qmess(6,5,1,'Enter file to save tree details: ',line)
           open(unit=73,file=line,status='new',iostat=ios)
           if(ios.ne.0)then
              call amess(6,1,'Error: cannot open file',1,'e')
              goto 143
           end if
           call clust(scmat,maxlen,numseq,ident,savec,savclu)
           call amess(6,1,'Cluster analysis complete ',1,'-')
           close(72)
           close(73)
           goto 7777
        end if
C
C --    option to write out prolog form of clusters .gt. cutoff
        call qmess(6,5,1,'Write cluster identifiers [Y]: ',line)
        call cucase(line)
        if(index(line,'N').eq.0)then
           savclu = .true.
1422       continue
           call qmess(6,5,1,'Enter filename for save: ',line)
           open(unit=7,file=line,status='new',iostat=ios)
           if(ios.ne.0)then
             call amess(6,1,'Error: cannot open file',1,'e')
             goto 1422
           end if
           do 9152, i = 1,numseq
              call clcase(ident(i))
9152       continue
        end if
        call clust(scmat,maxlen,numseq,ident,savec,savclu)
        call amess(6,1,'Cluster analysis complete ',1,'-')
        close(7)
        goto 7777
      end if
C
C --  addtree file generation
      call qmess(6,5,1,'Produce an ADDTREE file? [Y] ',
     - line)
      call cucase(line)
      if(index(line,'N').eq.0)then
         CALL QMESS(6,5,1,'Enter filename for addtree ',LINE)
         OPEN(UNIT=7,STATUS='NEW',FILE=LINE,iostat=ios)
         if(ios.ne.0)then
           call amess(6,1,'error opening file',1,'e')
           inquire(file=line,exist=fexist)
           if(fexist)then
             call amess(6,1,'File already exists',1,' ')
           end if
           goto 7777
         end if
         write(7,778)numseq
         do 331,j=2,numseq
            ii = j-1
            do 332, i=1,ii
               write(7,779)scmat(i,j)
332         continue
331      continue
C
         do 391,i=1,numseq
            write(7,780)i,ident(i)
391      continue
C
778      format(1x,i5)
779      format(1x,f7.2)
780      format(1x,i3,a)
         close(7)
         goto 7777
      end if
C
C --Kricket graph NAS V SD file
      call qmess(6,5,1,'Produce NAS v SDscore file [Y]',
     -line)
      call cucase(line)
      if(index(line,'N').eq.0)then
        call qmess(6,5,1,'Enter filename for plot ',line)
        OPEN(UNIT=7,STATUS='NEW',FILE=LINE,iostat=ios)
        if(ios.ne.0)then
           call amess(6,1,'error opening file',1,'e')
           inquire(file=line,exist=fexist)
           if(fexist)then
             call amess(6,1,'File already exists',1,' ')
           end if
           goto 7777
        end if
C
        do 781,i=1,lines
           write(7,775)a(i),char(9),ident(a(i)),char(9),
     -                 b(i),char(9),ident(b(i)),char(9),
     -                 nas(i),char(9),rscore(i)
775        format(i5,a1,a20,a1,
     -            i5,a1,a20,a1,
     -            f8.2,a1,f8.2)
781     continue
        close(7)
        goto 7777
      end if
C
C --option to produce prolog terms
      call qmess(6,5,1,'Generate prolog terms? [Y] ',line)
      call cucase(line)
      if(index(line,'N').eq.0)then
        call qmess(6,5,1,'Enter filename for prolog ',line)
        OPEN(UNIT=7,STATUS='NEW',FILE=LINE,iostat=ios)
        if(ios.ne.0)then
           call amess(6,1,'error opening file',1,'e')
           goto 7777
        end if
C       write out the prolog clauses
        do 8310,i=1,lines
C           write(7,8311)ident(a(i))(:endfin(ident(a(i)))),
C     -                  ident(b(i))(:endfin(ident(b(i)))),
C     -                             scmat(a(i),b(i))
C8311       format('p(''',a,''',''',a,''','f6.2,').')
C
C --just write out the i,j and scmat values
            write(7,8321)a(i),b(i),scmat(a(i),b(i))
8321        format(2i7,f10.2)
8310    continue
        close(7)
        goto 7777
      end if
C
7777  continue
      call qmess(6,5,1,'Re-analyze the data? [Y] ',line)
      call cucase(line)
      if(index(line,'N').eq.0)then
         first = .false.
         goto 6666
      end if
C
      call amess(6,1,'Program ORDER ends',1,' ')
      stop
C
      END
C
