/* peakdetect.C
28-APR-2003, David Mathog, Biology Division, Caltech

Reads a file consisting of column formatted data.  Detects
peaks beyond cutoff for a specified column (min or max) subject
to an optional column matching criteria (so that multiple ranges
may be processed independently within one input file.

Compiles cleanly with:
  gcc -Wall -ansi -pedantic -o peakdetect peakdetect.c
  
Versions:
1.0.1  28-APR-2003  minor bug fix, command line wanted pmin/pmax
        but help menu said min/max.
1.0.0  10-APR-2003  First release
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <float.h>  /* for FLT_MIN FLT_MAX */
#include <limits.h> /* for INT_MAX */

/* definitions */
#define VERSTRING "1.0.1 28-APR-2003"
#define MYMAXSTRING 1000000
#define MYMAXCOLS   1000

/* function prototypes */
void emit_help(void);
void insane(char *string);
int  lcl_strcasecmp(char *s1, char *s2);
void process_command_line_args(int argc,char **argv, int *pcol,
   int *rcol, int *xcol, int *pwidth, double *pmax, double *pmin);
void setdnumeric(double *val,int *numarg,int argc,char **argv,char * label);
void setinumeric(int *val,int *numarg,int argc,char **argv,char * label);
void setiposnumeric(int *val,int *numarg,int argc,char **argv,char * label);

/* functions */
void emit_help(void){
(void) fprintf(stderr,"peakdetect: detect peaks in columnar numeric data\n");
(void) fprintf(stderr,"            read from stdin, write to stdout\n\n");
(void) fprintf(stderr,"peakdetect command line summary:\n\n");
(void) fprintf(stderr,"   -pcol N       detect peaks in this column (default = 1, first column)\n");
(void) fprintf(stderr,"   -pmax  val    detect peaks > val (default, -FLT_MAX, max peak detection enabled)\n");
(void) fprintf(stderr,"   -pmin  val    detect peaks < val (default, -FLT_MAX, min peak detection disabled) \n");
(void) fprintf(stderr,"                 Mandatory: pmin < pmax value unless both are +/-FLT_MAX\n");
(void) fprintf(stderr,"   -rcol N       column whose value defines a range.\n");
(void) fprintf(stderr,"                 peaks are detected independently in each range.\n");
(void) fprintf(stderr,"   -pwidth N     maximum peak width (optional)\n");
(void) fprintf(stderr,"   -xcol N       measure width with values from this column instead of row numbers(optional).\n");
(void) fprintf(stderr,"   -h            print this help message (also -help --h --help -? --?)\n");
(void) fprintf(stderr,"   -i            emit version, copyright, license and contact information\n\n");
exit(EXIT_FAILURE);
}

void insane(char *string){
 (void) fprintf(stderr,"%s\n",string);
 exit(EXIT_FAILURE);
}

int  lcl_strcasecmp(char *s1, char *s2){
int c1;
int c2;
  for(; ;s1++,s2++){
    c1=toupper(*s1);
    c2=toupper(*s2);
    if(c1 < c2)return -1;
    if(c1 > c2)return  1;
    if(c1 == 0)return  0;  /*c2 also is 0 in this case */
  }
}

void setinumeric(int *val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "%s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(sscanf(argv[*numarg],"%d",val) != 1){
        (void) fprintf(stderr,"Bad integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
}

void setiposnumeric(int *val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "%s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(sscanf(argv[*numarg],"%d",val) != 1){
        (void) fprintf(stderr,"Bad integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
      if(*val <= 0){
        (void) fprintf(stderr,"Illegal nonpositive integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
}

void setdnumeric(double *val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "%s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(sscanf(argv[*numarg],"%lf",val) != 1){
        (void) fprintf(stderr,"Bad float argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
}

void process_command_line_args(int argc,char **argv, int *pcol,
   int *rcol, int *xcol, int *pwidth, double *pmax, double *pmin){

 int numarg=0;
 
 *pcol=1;
 *rcol=0;           /* disabled */
 *xcol=0;           /* disabled */
 *pwidth=INT_MAX;   /* disabled, bigger than any real input file */
 *pmax=-FLT_MAX;    /* appropriate for MAX peak, default */
 *pmin=-FLT_MAX;    /* disables MIN peak detection */

 while( ++numarg < argc){
    if( (lcl_strcasecmp(argv[numarg], "-h")==0)     ||
        (lcl_strcasecmp(argv[numarg], "--h")==0)    ||
        (lcl_strcasecmp(argv[numarg], "-?")==0)     ||
        (lcl_strcasecmp(argv[numarg], "--?")==0)    ||
        (lcl_strcasecmp(argv[numarg], "-help")==0)  ||
        (lcl_strcasecmp(argv[numarg], "--help")==0) ){
      emit_help();
    }
    else if(lcl_strcasecmp(argv[numarg], "-i")==0){
      (void)fprintf(stderr,"Version:   %s\n",VERSTRING);
      (void)fprintf(stderr,"bugs to:   mathog@caltech.edu\n");
      (void)fprintf(stderr,"Copyright: 2003 David Mathog and California Institute of Technology\n");
      (void)fprintf(stderr,"License terms:\n");
      (void)fprintf(stderr,"    You may run this program on any platform. You may\n");
      (void)fprintf(stderr,"    redistribute the source code of this program subject to\n");
      (void)fprintf(stderr,"    the condition that you do not first modify it in any way.\n");
      (void)fprintf(stderr,"    You may  distribute binary versions of this program so long\n");
      (void)fprintf(stderr,"    as they were compiled from unmodified source code.  There\n");
      (void)fprintf(stderr,"    is no charge for using this software.  You may not charge\n");
      (void)fprintf(stderr,"    others for the use of this software.\n");
      exit(EXIT_SUCCESS);
    }
    else if(lcl_strcasecmp(argv[numarg], "-pcol")==0){
      setinumeric(pcol,&numarg,argc,argv,"-pcol");
      continue;
    }
    else if(lcl_strcasecmp(argv[numarg], "-rcol")==0){
      setinumeric(rcol,&numarg,argc,argv,"-rcol");
      continue;
    }
    else if(lcl_strcasecmp(argv[numarg], "-xcol")==0){
      setinumeric(xcol,&numarg,argc,argv,"-xcol");
      continue;
    }
    else if(lcl_strcasecmp(argv[numarg], "-pmax")==0){
      setdnumeric(pmax,&numarg,argc,argv,"-pmax");
      continue;
    }
    else if(lcl_strcasecmp(argv[numarg], "-pmin")==0){
      setdnumeric(pmin,&numarg,argc,argv,"-pmin");
      continue;
    }
    else if(lcl_strcasecmp(argv[numarg], "-pwidth")==0){
      setiposnumeric(pwidth,&numarg,argc,argv,"-pwidth");
      continue;
    }
    else {
      (void) fprintf(stderr,"Unknown command line argument: %s\n",argv[numarg]);
      exit(EXIT_FAILURE);
      continue;
    }
  }
  if(*pmin > *pmax)insane("peakdetect, fatal error, pmin >= pmax ");
  if(*pmin == *pmax && (*pmin != FLT_MAX || *pmin != -FLT_MAX)){
    insane("peakdetect, fatal error, pmin == pmax  and not FLT_MAX or -FLT_MAX");  
  }
}

int main(int argc, char *argv[]){
char  bigstring[MYMAXSTRING];
char  peakstring[MYMAXSTRING];
char *newline;
char *cptr;
char *eptr;
int   i,peakcount,lastpeak,thisrange,readcols;
int   pcol,rcol,xcol,pwidth;
int   lcount,emitpeak,newpeak,lpos;
double peakval,thisval;
double pmax,pmin;
double vals[MYMAXCOLS];
  
  peakcount =  0;
  lastpeak  = -1;
  thisrange = -1;
  lcount    =  0;
  process_command_line_args(argc, argv, &pcol,&rcol,&xcol, &pwidth, &pmax, &pmin);
  
  while( fgets(bigstring,MYMAXSTRING,stdin) != NULL){
    emitpeak=0;
    newpeak=0;
    lcount++;
    newline=strstr(bigstring,"\n");
    if(newline == NULL){   /* string truncated */
      (void) fprintf(stderr,"peakdetect input record exceeds 1M characters\n"); 
      (void) fprintf(stderr,"peakdetect failed.  %d peaks emitted\n",peakcount);
      exit(EXIT_FAILURE);
    }
    *newline='\0';  /* replace the \n with a terminator */
    
    /* extract all columns float values until end of string,error, or no more columns */
    cptr=bigstring;
    for(i=0;i<MYMAXCOLS;i++){
      vals[i]=strtod(cptr,&eptr);
      if(vals[i]==0.0 && cptr==eptr){
         /* bad input value, terminate reads*/
         i--;
         break;
      }
      cptr=eptr;
    }
    readcols=i+1;

    /* at this point read readcols of numeric data */
    if(pcol > readcols)insane("peakdetect failed - input line did not include peak column");
    if(rcol > readcols)insane("peakdetect failed - input line did not include range column");
    if(xcol > readcols)insane("peakdetect failed - input line did not include xcol column");
    thisval=vals[pcol-1];
    if(rcol > 0){
      if(thisrange != vals[rcol-1]){ /* New range */
         thisrange = vals[rcol-1];
         if(lastpeak!=-1)emitpeak=1;
         lastpeak=-1;
      }
    }
    if(xcol > 0){ lpos = vals[xcol-1]; }
    else {        lpos = lcount;       }

    
    if(thisval >= pmax){                                           /* could be a new max peak */
      if(lastpeak == -1){         newpeak=1;   }                   /* start a new peak detection */
      else {                                                       /* inside a possible peak now  */
        if(peakval <= pmin){      emitpeak=1;                   }  /* terminate an existing MIN peak*/
        else{
          if(thisval >= peakval){   newpeak=1;   }                 /* this point is higher */
          else{
            if(lpos - lastpeak >= pwidth){ emitpeak=newpeak=1;  }  /* terminate by width */
          }
        }
      }
    }
    else if(thisval <= pmin){ /* could be a new min peak */
      if(lastpeak == -1){         newpeak=1;   }                   /* start a new peak detection */
      else {                                                       /* inside a possible peak now  */
        if(peakval >= pmax){        emitpeak=1;                 }  /* terminate an existing MAX peak*/
        else {
          if(thisval <= peakval){   newpeak=1;   }                 /* this point is lower */
          else{
            if(lpos - lastpeak >= pwidth){ emitpeak=newpeak=1;  }  /* terminate by width */
          }
        }
      }
    }
    else{ /* not a peak */
      if(lastpeak != -1){ emitpeak=1; }                            /* terminate ones though */
    }
    if(emitpeak){
      (void) fprintf(stdout,"%s\n",peakstring);
      lastpeak=-1;
    }
    if(newpeak){
       lastpeak = lpos;
       peakval  = thisval;
       strcpy(peakstring,bigstring);
    }
  }
  if(lastpeak != -1){
    (void) fprintf(stdout,"%s\n",peakstring);
  }
  exit(EXIT_SUCCESS);
}
