/*----------------------------------------------------------------------*\
 | Flux and mineral weight data supplied by the authors contain cells	|
 | with both letters and numbers; for example, a cell may contain the	|
 | string "x 0.000172".  The 'x' indicates that the value 0.000172 is	|
 | considered by the authors to be too high based on other information	|
 | about that sample; it serves as a "red flag" to make the user aware	|
 | that the numerical value should not be regarded as reliable.			|
 |																		|
 | In the data set, it is best to not combine letters and numbers in	|
 | the same cell, because spreadsheet and database programs must then	|
 | treat the cell differently from other cells in the same column.		|
 |																		|
 | I believe a better approach is to place the qualifying letter in a	|
 | separate column preceding the numerical value.  This must be done	|
 | with care, of course, since not all cells in a particular column		|
 | have qualifying letters; most do not.								|
 |																		|
 | This program is intended to separate the qualifying characters from	|
 | the numerical cell values while preserving the columnar arrangement.	|
 | Its procedure is to read each line and parse the line into tokens,	|
 | where each token is delimited by (a) the beginning of the line, (b)	|
 | a tab character, or (c) the end of the line.  Where it is needed,	|
 | each token is analyzed to determine whether it contains only numeric	|
 | data or a qualifying character and numeric data.  If it contains a	|
 | qualifying character, that character is output followed by a tab;	|
 | if there is no qualifying character, the tab is output alone.		|
 | The effect is to insert a column containing only the qualifying		|
 | characters (or empty cells where no qualifying character was found).	|
 | Not all columns need to be analyzed this way, however, so a "map"	|
 | is consulted.  The map is a character string consisting of the		|
 | characters '0' and '1'; 0 indicates the corresponding column should	|
 | be output as is without analysis, 1 indicates the column should be	|
 | analyzed as described, and two columns should be output for it.		|
 | The default map is "0111110"; the last character in the map is used	|
 | for all subsequent columns.  To use a different map, specify -map	|
 | on the command line.  In addition, the -h option may be used to		|
 | specify the number of lines at the beginning of the file that should	|
 | be output exactly as they are, without interpretation.  A space must	|
 | follow "-h" and "-map" on the command line.							|
 | Output is directed to stdout; redirect it to a file to save it.		|
 |																		|
 | Usage: fix_columns [-h number] [-map map_string] input_file			|
 |        where number is an integer and map_string is composed only of	|
 |        the characters '0' and '1'.									|
 |																		|
 | Peter N. Schweitzer (U.S. Geological Survey, Reston, VA 22092)		|
\*----------------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAXLEN	4096

#define SPLIT	'1'

static char *default_map = "0111110";

main (int argc, char *argv[]) {
	int i;
	int header_line_count = 0;
	char *input_file;
	char line [MAXLEN];
	char *s,*b,*e,*m,*end;
	char *map = default_map;
	FILE *in;

	input_file = NULL;
	for (i=1; i < argc; i++)
		if (memcmp (argv[i],"-h",2) == 0) {
			i++;
			header_line_count = atoi (argv[i]);
			}
		else
			if (memcmp (argv[i],"-m",2) == 0) {
				i++;
				map = argv[i];
				}
			else
				input_file = argv[i];

	if (!input_file) {
		fprintf (stderr,"Usage: %s [-h count] [-m map] input_file\n",argv[0]);
		exit (0);
		}

	if (in = fopen (input_file,"r")) {
		while (header_line_count && fgets (line,MAXLEN,in)) {
			fputs (line,stdout);
			header_line_count--;
			}
		while (fgets (line,MAXLEN,in)) {
			if (s = strrchr (line,'\n')) *s = 0;
			if (s = strrchr (line,'\r')) *s = 0;

			end = line + strlen (line);
			m = map;
			b = line;

			while (b < end) {

				/*------------------------------------------------------*\
				 | Delimit a token, with b pointing at its first byte	|
				 | and e pointing at either a NUL or a tab.				|
				\*------------------------------------------------------*/

				for (e=b; *e && *e != '\t'; e++);

				/*------------------------------------------------------*\
				 | What we do with the token depends on the current map	|
				 | character.											|
				\*------------------------------------------------------*/

				switch (*m) {
					case SPLIT:

						/*----------------------------------------------*\
						 | Skip leading spaces (these cannot be tabs)	|
						\*----------------------------------------------*/

						while (b < e && *b == ' ') b++;

						/*----------------------------------------------*\
						 | If the token is empty and *e is a tab, just	|
						 | emit two tabs; if *e is NUL, do nothing.		|
						\*----------------------------------------------*/

						if (b == e && *e == '\t') {
							fputc ('\t',stdout);
							fputc ('\t',stdout);
							b = e+1;
							}
						else {

							/*------------------------------------------*\
							 | If the token describes a number, output	|
							 | a tab followed by the token.  If the		|
							 | first character is a letter or another	|
							 | non-numeric byte, output it followed by	|
							 | a tab followed by the rest of the token.	|
							\*------------------------------------------*/

							if (isdigit (*b) || *b == '+' || *b == '-' || *b == '.') {
								fputc ('\t',stdout);
								while (b <= e) fputc (*b++,stdout);
								}
							else {
								fputc (*b++,stdout);
								fputc ('\t',stdout);
								while (b < e && *b == ' ') b++;
								while (b <= e) fputc (*b++,stdout);
								}
							}
						break;
					default:
						while (b <= e) fputc (*b++,stdout);
						break;
					}
				if (*(m+1)) m++;
				}
			fputc ('\n',stdout);
			}
		fclose (in);
		}
	else
		fprintf (stderr,"Error: could not open input file %s",input_file);
	}
