/* uniqmerge.c

Version:   1.0.2  19-MAY-2003
Author:    David Mathog, Biology Division, Caltech
email:     mathog@caltech.edu
Copyright: 2003 David Mathog and California Institute of Technology (Caltech)


Description:
   This program processes several input files and emits lines
   which appear >=N, or <=N, default is <=1.
   This program merges up to FOPEN_MAX - 1 (typically at least 19)
   odd files.
   Files may be marked with tags so that the merge operation
   occurs in blocks.
   
   Most of the command line options are similar to the "uniq"
   program.


Compilation:
  This code is known to compile cleanly with no warnings or errors.

  % gcc -Wall -ansi -pedantic  -o uniqmerge uniqmerge.c
 
  With the Solaris Forte compiler:

  % cc -Xc  -o uniqmerge uniqmerge.c
  
  add -DMAXINFILE=50 (or whatever) to override default
  Ignore warnings about "long long".
  
License terms:
    You may run this program on any platform. You may
    redistribute the source code of this program subject to
    the condition that you do not first modify it in any way.
    You may  distribute binary versions of this program so long
    as they were compiled from unmodified source code.  There
    is no charge for using this software.  You may not charge
    others for the use of this software.

Changes:

Version:   1.0.2  19-MAY-2003  Small tweak in the sort routine.
Version:   1.0.1  18-MAY-2003  Terrible bug - malloc's were accidentally placed
                     BEFORE the command line processing.  Amazingly enough this
                     worked on linux with gcc, but nowhere else.
Version:   1.0    01-MAY-2003  Initial release


Bug reports:  Please report bugs via email.



*/

#include <stdlib.h>
#include <stdio.h>
#include <strings.h>
#include <string.h> /* for strstr */
#include <ctype.h>  /* for toupper */
#include <unistd.h>

#define BMVERSION   "1.0.1  18-MAY-2003"

#define DEFSTACK    32000
#define MYMAXSTRING 16384
#ifndef MAXINFILE
#define MAXINFILE   (FOPEN_MAX-1)  /* because need 1 open for output file */
#endif  /* MAXINFILE */
#define MAXHINTS    65536
#define MTBUF       32     /* length of format buffer for padding out long long with commas */

#define NO          0
#define YES         1
#define COUNTYES    1
#define SOURCEYES   2
#define TAGYES      4

#define SORTSA      1      /* string  ascending */
#define SORTSD      2      /* string  descending */
#define SORTNA      3      /* numeric ascending */
#define SORTND      4      /* numeric descending*/

#define LE          1
#define GE          2

#define MYSWAP(A,B,C) A=B; B=C; C=A;

/* triggers for state transitions */


/* function prototypes in alphabetical order */

void  emit_help(void);
void  empty_stack(char **str_stack,int stacksize,int *stackptr);
void  insane(char *string);
int   lcl_strcasecmp(char *s1, char *s2);
int   lcl_strcmprange(char *s1, char *s2, int start, int end, int usecase);
void  merge_stack(char **str_stack,int *int_stack,int *add_stack,double *d_stack,
                int stacksize, int frontptr, int *stackptr,
                int sorttype,int usecase,int sc,int ec, int nc);
void  process_command_line_args(int argc,char **argv, char **tstring,
       int *lcount, int *usecase, int *sc, int *ec, int *nc,
       int *direction, int *replicas, int *sorttype,
       int *stacksize, int *ninfiles, FILE **outfile);
void  push(int from,char **str_stack,int *int_stack,
           int *add_stack,int stacksize,int *stackptr,char *string);
char *read_line(char *buffer,FILE *infile);
void  setiposnumeric(int *val,int *numarg,int argc,char **argv,char * label);
void  setsorttype(int *val,int *numarg,int argc,char **argv,char * label);
void  sort_stack(char **str_stack,int *int_stack,int *add_stack,double *d_stack,
                int frontptr, int *stackptr,int sorttype,int usecase,int sc,int ec, int nc);
void  stack_dump(FILE *outfile, char *tstring, int tag,
          int direction, int replicas, int lcount,
          char **str_stack,int *int_stack,int *add_stack, int stackptr);

/* globals */

FILE *infiles[MAXINFILE];  /* file pointers for input files */
char *buffers[MAXINFILE];

void setiposnumeric(int *val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "%s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(sscanf(argv[*numarg],"%d",val) != 1){
        (void) fprintf(stderr,"Bad integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
      if(*val <= 0){
        (void) fprintf(stderr,"Illegal nonpositive integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
}

void setsorttype(int *sorttype,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "%s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(lcl_strcasecmp(argv[*numarg],"s+")==0){        *sorttype=SORTSA; }
      else if(lcl_strcasecmp(argv[*numarg],"s-")==0){   *sorttype=SORTSD; }
      else if(lcl_strcasecmp(argv[*numarg],"n+")==0){   *sorttype=SORTNA; }
      else if(lcl_strcasecmp(argv[*numarg],"n-")==0){   *sorttype=SORTNA; }
      else{  insane("uniqmerge: fatal error: unknown sort type"); }
}

/* reads one line from an input file into buffer up to N chars long
   Truncation is a fatal error.  End of file (NULL) is ok, and NULL
   is returned */
   
char *read_line(char *buffer,FILE *infile){
  char *newline;
    if(fgets(buffer,MYMAXSTRING-1,infile) == NULL){
      return NULL;
    }
    newline=strstr(buffer,"\n");
    if(newline == NULL){   /* string truncated, this is fatal in this program */
      insane("uniqmerge: fatal error: input line was too long");
    }
    *newline='\0';                        /* replace the \n with a terminator */
    return buffer;
} /* end of read_line */

int  lcl_strcasecmp(char *s1, char *s2){
int c1;
int c2;
  for(; ;s1++,s2++){
    c1=toupper(*s1);
    c2=toupper(*s2);
    if(c1 < c2)return -1;
    if(c1 > c2)return  1;
    if(c1 == 0)return  0;  /*c2 also is 0 in this case */
  }
}


/* compare between start and end only. Have to be careful about
   sizes here because one or other or both could terminate before
   the comparison region 
   
   returns:
   1 if s1> s2
   0 if s1==s2
  -1 if s1< s2
  -2 if both terminated before comparison range
  
*/
int  lcl_strcmprange(char *s1, char *s2, int start, int end, int usecase){
int c1,ok1;
int c2,ok2;
int i;
  if(end<0)return -2;
  if(start<0)return -2;
  if(end<start)return -2;
  ok1=ok2=1;
  for(i=0;i<=end ;s1++,s2++,i++){
    if(usecase == NO){
      c1=toupper(*s1);
      c2=toupper(*s2);
    }
    else {
      c1=*s1;
      c2=*s2;
    }
    if(ok1 && (c1 == 0))ok1=0;
    if(ok2 && (c2 == 0))ok2=0;
    if(ok1 && ok2){
      if((i>=start) && (i<=end)){
        if(c1 < c2)return -1;
        if(c1 > c2)return  1;
      }
    }
    else if(ok1){
      if(i>=start)return 1;   /*s2 terminated, s1 did not, s1 is bigger*/
    }
    else if(ok2){
      if(i>=start)return -1;  /*s1 terminated, s2 did not, s2 is bigger*/
    }
    else {
      if(i>=start)return 0;   /* both terminated within comparison region, equivalent */
      return -2;              /* both terminated before comparison, no comparisoin possible */
    }
  }
  return 0;  /* over range they are identical*/
}

void insane(char *string){
 (void) fprintf(stderr,"%s\n",string);
 exit(EXIT_FAILURE);
}

void emit_help(void){
(void) fprintf(stdout,"uniqmerge command summary:\n");
(void) fprintf(stdout,"\n");
(void) fprintf(stdout,"  This program merges the output of up to %d files.\n",MAXINFILE);
(void) fprintf(stdout,"  Lines that appear in <=N or >=N of the input files are emitted.\n");
(void) fprintf(stdout,"  Files may be tagged so that the tagged blocks of lines are merged.\n");
(void) fprintf(stdout,"  A tagged block begins with a line starting with a tag string\n");
(void) fprintf(stdout,"  followed by zero or more lines to be analyzed.\n");
(void) fprintf(stdout,"  Each tagged file must have the same number of tagged blocks\n\n");
(void) fprintf(stdout,"  syntax: uniqmerge [options] outfile file1 file2  ... file%d\n",MAXINFILE);
(void) fprintf(stdout,"          outfile    = \"-\" directs output to stdout.\n");
(void) fprintf(stdout,"          input file = \"-\" reads input from stdin (only one may do so).\n\n");
(void) fprintf(stdout,"  Command line options are.\n\n");
(void) fprintf(stdout,"   -tag STRING    tag indicator string. Default is no tags. \n");
(void) fprintf(stdout,"   -count         prefix output lines with the number of file occurrences\n");
(void) fprintf(stdout,"   -source        prefix output lines with the number of the source file\n");
(void) fprintf(stdout,"   -tcount        prefix output tags  with the number of the tag\n");
(void) fprintf(stdout,"   -nocase        ignore differences in case when comparing\n");
(void) fprintf(stdout,"   -sc            start character to compare (default = 1)\n");
(void) fprintf(stdout,"   -ec            end character to compare (default = last in line)\n");
(void) fprintf(stdout,"   -nc            number of characters to compare (use -ec OR -nc)\n");
(void) fprintf(stdout,"   -ge N          print lines occurring >=N  times\n");
(void) fprintf(stdout,"   -le N          print lines occurring <=N  times (default, N=1)\n");
(void) fprintf(stdout,"   -sort TYPE     before emitting sort strings. Types are :\n");
(void) fprintf(stdout,"                    [s=string,n=numeric][+=ascending,-=descending], Default: s+\n");
(void) fprintf(stdout,"   -ssize N       maximum # of strings in equivalent tagged blocks\n");
(void) fprintf(stdout,"                    or untagged whole files.  default = %d\n",DEFSTACK);
(void) fprintf(stdout,"   -h             help message      (also -help --h --help -? --?)\n");
(void) fprintf(stdout,"   -i             emit version, copyright, license and contact information and exit\n\n");
exit(EXIT_FAILURE);
}




void  process_command_line_args(int argc,char **argv, char **tstring,
       int *lcount, int *usecase, int *sc, int *ec, int *nc,
       int *direction, int *replicas, int *sorttype,
       int *stacksize, int *ninfiles, FILE **outfile){

int  numarg=0;
int  i;
int  doingfiles=0;


  if(argc <2){ /* not enough arguments */
    emit_help();
  }
  
  *tstring    =  NULL;
  *lcount     =  NO;
  *usecase    =  YES;
  *sc         =  1;
  *ec         = -1; /*EOL*/
  *nc         = -1;
  *direction  =  LE;
  *replicas   =  1;
  *sorttype   =  SORTSA;
  *stacksize  =  2*DEFSTACK;
  *replicas=-1;
  *ninfiles=-1;
  *outfile=NULL;
  
  for (i=0; i<MAXINFILE; i++){
    infiles[i]=NULL;
  }
  
  
  while( ++numarg < argc){
    if( (lcl_strcasecmp(argv[numarg], "-h")==0)     ||
        (lcl_strcasecmp(argv[numarg], "--h")==0)    ||
        (lcl_strcasecmp(argv[numarg], "-?")==0)     ||
        (lcl_strcasecmp(argv[numarg], "--?")==0)    ||
        (lcl_strcasecmp(argv[numarg], "-help")==0)  ||
        (lcl_strcasecmp(argv[numarg], "--help")==0) ){
      emit_help();
    }
    else if(lcl_strcasecmp(argv[numarg], "-i")==0){
      (void)fprintf(stderr,"Version:    %s\n",BMVERSION);
      (void)fprintf(stderr,"MaxInFiles: %d\n",MAXINFILE);
      (void)fprintf(stderr,"bugs to:    mathog@caltech.edu\n");
      (void)fprintf(stderr,"Copyright:  2003 David Mathog and California Institute of Technology\n");
      (void)fprintf(stderr,"License terms:\n");
      (void)fprintf(stderr,"    You may run this program on any platform. You may\n");
      (void)fprintf(stderr,"    redistribute the source code of this program subject to\n");
      (void)fprintf(stderr,"    the condition that you do not first modify it in any way.\n");
      (void)fprintf(stderr,"    You may  distribute binary versions of this program so long\n");
      (void)fprintf(stderr,"    as they were compiled from unmodified source code.  There\n");
      (void)fprintf(stderr,"    is no charge for using this software.  You may not charge\n");
      (void)fprintf(stderr,"    others for the use of this software.\n");
      exit(EXIT_SUCCESS);
    }
    else if(lcl_strcasecmp(argv[numarg], "-tag")==0){
      numarg++;
      *tstring=argv[numarg];
    }
    else if(lcl_strcasecmp(argv[numarg], "-count")==0){
      *lcount |= COUNTYES;
    }
    else if(lcl_strcasecmp(argv[numarg], "-source")==0){
      *lcount |= SOURCEYES;
    }
    else if(lcl_strcasecmp(argv[numarg], "-tcount")==0){
      *lcount |= TAGYES;
    }
    else if(lcl_strcasecmp(argv[numarg], "-nocase")==0){
      *usecase=NO;
    }
    else if(lcl_strcasecmp(argv[numarg], "-sc")==0){
      setiposnumeric(sc,&numarg,argc,argv,"-sc");
    }
    else if(lcl_strcasecmp(argv[numarg], "-ec")==0){
      setiposnumeric(ec,&numarg,argc,argv,"-ec");
    }
    else if(lcl_strcasecmp(argv[numarg], "-nc")==0){
      setiposnumeric(nc,&numarg,argc,argv,"-nc");
    }
    else if(lcl_strcasecmp(argv[numarg], "-sort")==0){
      setsorttype(sorttype,&numarg,argc,argv,"-sort");
    }
    else if(lcl_strcasecmp(argv[numarg], "-ssize")==0){
      setiposnumeric(stacksize,&numarg,argc,argv,"-ssize");
      *stacksize *= 2;  /* reserve enough space to be able to merge sort inside the stack */
    }
    else if(lcl_strcasecmp(argv[numarg], "-le")==0){
      setiposnumeric(replicas,&numarg,argc,argv,"-le");
      *direction=LE;
    }
    else if(lcl_strcasecmp(argv[numarg], "-ge")==0){
      setiposnumeric(replicas,&numarg,argc,argv,"-ge");
      *direction=GE;
    }
     else if(strcmp(argv[numarg], "-")==0 && ! doingfiles){
       *outfile=stdout;
       doingfiles=1;
    }
    else if(strcmp(argv[numarg], "-")==0 && doingfiles){
       if(doingfiles>1)insane("uniqmerge: fatal error, \"-\" may only be used for one input file");
       (*ninfiles)++;
       if(*ninfiles >= MAXINFILE)insane("uniqmerge: fatal error, too many input files");
       infiles[*ninfiles]=stdin;
       doingfiles++;
    }
    else if(*argv[numarg] == '-' ){
       (void) fprintf(stderr,"uniqmerge: fatal error: on command line [%s] is not recognized\n",argv[numarg]);
       exit(EXIT_FAILURE);
    }
    else{
      if(! doingfiles){ /* this is the output file, stdout is allowed (see above) */
        doingfiles=1;
        *outfile=fopen(argv[numarg],"w");
        if(*outfile == NULL)insane("uniqmerge: fatal error, could not create output file");
      }
      else {
        (*ninfiles)++;
        if(*ninfiles >= MAXINFILE)insane("uniqmerge: fatal error, too many input files");
        infiles[*ninfiles]=fopen(argv[numarg],"r");
        if(infiles[*ninfiles] == NULL){
          (void) fprintf(stderr,"uniqmerge: fatal error, could not open input file %s\n",argv[numarg]);
          exit(EXIT_FAILURE);
        }
      }
    }
  }

  /* sanity checking */

  if((*nc != -1) && (*ec != -1))insane("uniqmerge: fatal error: specify -ec or -nc but not both");
  if(*ninfiles <= 0)insane("uniqmerge: fatal error: at least two input files are required");
  (*ninfiles)++;
  (*sc)--;           /*external is 1-N, internal is 0-N-1*/
  if(*sc < 0)insane("uniqmerge: fatal error: -sc value must be >=1");
  if(*ec>0)(*ec)--;   /*external is 1-N, internal is 0-N-1*/
  if((*ec != -1) && (*ec<*sc))insane("uniqmerge: fatal error: -ec value must be >= -sc value");
  if(*nc == -1){                   
    if(*ec == -1)*ec = MYMAXSTRING; 
  }                               
  else {                          
    *ec = *sc + *nc - 1;             
  }                               
  if(*replicas == -1){  /* default, find unique lines */
    *direction=LE;
    *replicas=1;
  }
}


int main(int argc, char *argv[]){
/* command line values */
int   ninfiles;  /* Number of input files */
FILE *outfile;   /* file pointer for output file */
int   tag;       /* current tag being processed on all files */
int   notdone;   /* number of files still being read from */
int   stacksize; /* stack size, duh! */
int   stackptr,holdstackptr;
int   replicas;
int   lcount,usecase,sc,ec,nc,direction,sorttype;

char *cptr;
char *tstring;   /* tag string */
char  holdstring[MYMAXSTRING];
char  deftstring[2];
int   tslen;
int   i;
char  **str_stack;
int    *int_stack;  /* holds count of matching strings */
int    *add_stack;  /* indicates which file it was added from */
double *d_stack;    /* double stack, used for numerical comparisons */


  (void)strcpy(deftstring,"#");
  tstring=deftstring;
  process_command_line_args(argc,argv,&tstring,&lcount,&usecase,&sc,&ec,&nc,
      &direction,&replicas,&sorttype,
      &stacksize,&ninfiles,&outfile);


  str_stack = malloc(stacksize * sizeof(char *));
  int_stack = malloc(stacksize * sizeof(int));
  d_stack   = malloc(stacksize * sizeof(double));
  add_stack = malloc(stacksize  *sizeof(int));

  if(str_stack==NULL)insane("uniqmerge, could not allocate string stack");
  if(int_stack==NULL)insane("uniqmerge, could not allocate int stack");
  if(d_stack  ==NULL)insane("uniqmerge, could not allocate double stack");
  if(add_stack==NULL)insane("uniqmerge, could not allocate add stack");

  tslen=0;
  if(tstring!=NULL)tslen=strlen(tstring);
  
  for(i=0; i<MAXINFILE; i++){
    buffers[i]=malloc(sizeof(char)*MYMAXSTRING);
    if(buffers[i]==NULL)insane("uniqmerge: fatal error: could not allocate memory for input line buffers");
  }

  notdone=ninfiles;
  tag=0;
  (void)strcpy(holdstring,"Before first tag");
  while( notdone > 0){
    tag++;

    stackptr=-1;
    *holdstring='\0';
    for(i=0;i<ninfiles;i++){
      if(i==0)empty_stack(str_stack,stacksize,&stackptr);
      holdstackptr=stackptr;
      while(1){
        cptr=read_line(buffers[i], infiles[i]);
        if(cptr==NULL){  /* EOF hit */
          if(*holdstring && i!=0)insane("uniqmerge: fatal error: input files contain different numbers of tags");
          notdone--;
          break;
        }
        if(tslen>0){
        
          if(lcl_strcmprange(buffers[i],tstring,0,tslen-1,usecase) == 0){
            if(i==0){
              strcpy(holdstring,buffers[0]);
            }
            else {
              if(*holdstring == '\0')insane("uniqmerge: fatal error: input files contain different numbers of tags");
            }
            break;
          }
        }
        push(i,str_stack,int_stack,add_stack,stacksize,&stackptr,buffers[i]);
      }
      /* all entries from this file/block on end of stack, sort that part */
      sort_stack(str_stack,int_stack,add_stack,d_stack,holdstackptr+1,&stackptr,sorttype,usecase,sc,ec,nc);
      /* merge entries with existing entries, if any */
      merge_stack(str_stack,int_stack,add_stack,d_stack,stacksize,holdstackptr+1,&stackptr,sorttype,usecase,sc,ec,nc);
      
    }
    if(notdone==0)tag=-1;
    stack_dump(outfile,holdstring,tag,direction,replicas,lcount,str_stack,int_stack,add_stack,stackptr);
  }
  exit(EXIT_SUCCESS);
}

void push(int from,char **str_stack,int *int_stack,
  int *add_stack,int stacksize,int *stackptr,char *string){
char *stemp;
  (*stackptr)++;
  if(*stackptr >= stacksize)insane("uniqmerge: fatal error, too many merge lines");
  stemp=malloc(sizeof(char) * strlen(string) + 1);
  if(stemp==NULL)insane("uniqmerge: fatal error, ran out of memory");
  (void) strcpy(stemp,string);
  str_stack[*stackptr]=stemp;   /* string */
  int_stack[*stackptr]=1;       /* 1 appearance in input files*/
  add_stack[*stackptr]=from;    /* which file stream it is from */
}

void empty_stack(char **str_stack,int stacksize,int *stackptr){
  int i;
  for(i=*stackptr;i>=0;i--){
    free(str_stack[i]);
    str_stack[i]=NULL;
  }
  *stackptr=-1;
}


/* add_stack now containers position indicators for the stack in sorted
order (if any).  */
void stack_dump(FILE *outfile, char *tstring, int tag,
          int direction, int replicas, int lcount,
          char **str_stack,int *int_stack,int *add_stack, int stackptr){
int i;
int printit;

  for(i=0;i<=stackptr;i++){
    printit=0;
    if(        (direction==GE) && (int_stack[i]>=replicas)){ printit=1;;}
    else if(   (direction==LE) && (int_stack[i]<=replicas)){ printit=1;;}
    if(printit==1){
      if(lcount & COUNTYES  ){  (void)fprintf(outfile,"%8d ",int_stack[i]); }
      if(lcount & SOURCEYES ){  (void)fprintf(outfile,"%8d ",add_stack[i]+1); }
      (void)fprintf(outfile,"%s\n",str_stack[i]);
    }
  }
  if( (tag>0) && (*tstring != '\0')){
    if(lcount & TAGYES)(void)fprintf(outfile,"%8d ",tag);
    (void)fprintf(outfile,"%s \n",tstring);
  }
}

/* uses combsort to sort in place.  This variant includes an automatic
restart if it goes past gap=1 and still swapped.  Note that since the region
being swapped was all just entered all add_stack and int_stack values
are the same and need not be moved */

void sort_stack(char **str_stack,int *int_stack,int *add_stack,double *d_stack,
                int frontptr, int *stackptr,int sorttype,int usecase,int sc,int ec, int nc){
int    i,j;
double shrink=1.3;
double dval;
int    gap;
int    swaps;
int    done;
char  *stemp;
char   chold;
int    sortdir;
int    restart;


    if(frontptr < 0)frontptr=0;
    if(*stackptr <= frontptr)return;  /* nothing to sort */
    sortdir=1;
    switch(sorttype){
      case SORTSD:
        sortdir = -1; /* This falls through intentionally!!! */
      case SORTSA:
        for(done=0; !done; ){
           restart=0;
           gap = (*stackptr-frontptr)/2 - 1;
           if(gap<1)gap=1;
           for( ;gap>=1;gap/=shrink){
             if(gap<1)gap=1;
             swaps=0;
             for(i=frontptr,j=frontptr+gap;j<=*stackptr;i++,j++){
               if(lcl_strcmprange(str_stack[i],str_stack[j],sc,ec,usecase)==sortdir){
                 swaps++;
                 MYSWAP(stemp,str_stack[i],str_stack[j])
               }
             }
             if(gap==1){
               if(swaps==0){
                 done=1;
                 break;
               }
               else {
                 restart++;
                 if(restart >= 7)break;
               }
             }
           }
        }

        /* remove duplicates within the sorted list*/
        for(i=frontptr,j=i;j<=*stackptr;){
          if(lcl_strcmprange(str_stack[i],str_stack[j],sc,ec,usecase)==0){
            j++;
          }
          else{
            i++;
            if(i!=j){
              str_stack[i]=str_stack[j];
              str_stack[j]=NULL;
            }
            j++;
          }
        }
        *stackptr=i;
        break;
      case SORTND:
        sortdir = -1; /* This falls through intentionally!!! */
      case SORTNA:
        /* extract numerical values.  Any exceptions is a fatal error */
        for(i=frontptr;i<=*stackptr;i++){
          if(sc > strlen(str_stack[i]))insane("uniqmerge: fatal error: -sc is beyond the end of an input line");
          if(ec < strlen(str_stack[i])){
            chold=str_stack[i][ec];
            str_stack[i][ec]='\0';
            if(sscanf( &((str_stack[i])[sc]) ,"%lf",&dval)<=0)insane("uniqmerge: fatal error: invalid numeric value encountered");
            str_stack[i][ec]=chold;
          }
          else {
            if(sscanf( &((str_stack[i])[sc]),"%lf",&dval)<=0)insane("uniqmerge: fatal error: invalid numeric value encountered");
          }
          d_stack[i]=dval;
        }
        for(done=0; !done; ){
           restart=0;
           gap = (*stackptr-frontptr)/2 - 1;
           if(gap<1)gap=1;
           for( ;gap>=1;gap/=shrink){
             if(gap<1)gap=1;
             swaps=0;
             for(i=frontptr,j=frontptr+gap;j<=*stackptr;i++,j++){
               if( ( sortdir==1  && d_stack[i]<d_stack[j])  ||
                   ( sortdir==-1 && d_stack[i]>d_stack[j])
                 ){
                 swaps++;
                 MYSWAP(stemp,str_stack[i],str_stack[j])
                 MYSWAP(dval ,  d_stack[i],  d_stack[j])
               }
             }
             if(gap==1){
               if(swaps==0){
                 done=1;
                 break;
               }
               else {
                 restart++;
                 if(restart >= 7)break;
               }
             }
           }
        }
        /* remove duplicates within the sorted list*/
        for(i=frontptr,j=i+1;j<=*stackptr;){
          if(d_stack[i] == d_stack[j]){
            j++;
          }
          else{
            i++;
            if(i!=j){
              str_stack[i]=str_stack[j];
              d_stack[i]  =d_stack[j];
              str_stack[j]=NULL;
            }
            j++;
          }
        }
        *stackptr=i;
        break;
      default:
        insane("uniqmerge: fatal programming error: unknown sort type encountered");
    }
}

/* two lists, 0 -> frontptr -1, frontptr -> stackptr.  Do a merge sort */
void merge_stack(char **str_stack,int *int_stack,int *add_stack,double *d_stack,
                int stacksize, int frontptr, int *stackptr,
                int sorttype,int usecase,int sc,int ec, int nc){
int    i,j;
int    done;
int    sortdir;
int    s1,e1,s2,e2,out;


    
    if(frontptr<=0)return;
    if(stacksize - *stackptr < *stackptr)insane("uniqmerge: fatal error: out of stack space, increase -ssize parameter");

    /* shift the upper one to the very end of the array.*/
    for(i=*stackptr,j=stacksize-1; i>=frontptr ;i--,j--){
      str_stack[j] =  str_stack[i];
      add_stack[j] =  add_stack[i];
      int_stack[j] =  int_stack[i];
      d_stack[j]   =  d_stack[i];
      str_stack[i] =  NULL;
    }
    e2=stacksize-1;
    s2=j+1;
    
    /* shift the lower one up too */
    for(i=frontptr-1;i>=0;i--,j--){
      str_stack[j] =  str_stack[i];
      add_stack[j] =  add_stack[i];
      int_stack[j] =  int_stack[i];
      d_stack[j]   =  d_stack[i];
      str_stack[i] =  NULL;
    }
    
    
    
    s1=j+1;
    e1=s2-1;
    out=0;
    

    for(done=0; !done; out++ ){
   
      sortdir=-1;
      switch(sorttype){
        case SORTSD:
          sortdir = 1; /* This falls through intentionally!!! */
        case SORTSA:
          if(s1>e1){
            if(s2>e2){   return;     }
            else{        i=s2; s2++; }
          }
          else if(s2>e2){
            if(s1>e1){   return;     }
            else{        i=s1; s1++; }
          }
          else if(lcl_strcmprange(str_stack[s1],str_stack[s2],sc,ec,usecase)==0){
                         i=s1;  s1++;
                         (int_stack[i])++;
                         (*stackptr)--;
                         s2++;  /* duplicate, remove the UPPER one, which is the new one */
          }
          else if(lcl_strcmprange(str_stack[s1],str_stack[s2],sc,ec,usecase)==sortdir){
                         i=s1;  s1++;
          }
          else {         i=s2; s2++;
          }
          break;
        case SORTND:
          sortdir = 1; /* This falls through intentionally!!! */
        case SORTNA:
          if(s1>e1){
            if(s2>e2){   return;     }
            else{        i=s2; s2++; }
          }
          else if(s2>e2){
            if(s1>e1){   return;     }
            else{        i=s1; s1++; }
          }
          else if( d_stack[s1] == d_stack[s2] ){
                         i=s1;  s1++;
                         (int_stack[i])++;
                         (*stackptr)--;
                         s2++;  /* duplicate, remove the UPPER one, which is the new one */
          }
          else if( ( sortdir==1  && d_stack[s1]<d_stack[s2])  ||
                   ( sortdir==-1 && d_stack[s1]>d_stack[s2])
                 ){
                         i=s1;  s1++;
          }
          else {         i=s2; s2++;}
          break;
        default:
           insane("uniqmerge: fatal programming error: illegal sorttype in mergestack");
      }
      str_stack[out] =  str_stack[i];
      add_stack[out] =  add_stack[i];
      int_stack[out] =  int_stack[i];
      d_stack[out]   =  d_stack[i];
      str_stack[i]   =  NULL;
    }
}
