/* Copyright 2002 Daniel Egnor.  See LICENSE file.
 *
 * This program is part of the processing chain to convert TIGER/Line data
 * into an index ("map") for rapid address lookup.  It's the very first step,
 * which reads TIGER/Line records and outputs "step1" data.  It keeps internal 
 * sorted tables to join the various TIGER tables against each other. */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

struct r4 {
	char id[10];
	int r5id;
} *r4 = NULL;
int r4_alloc = 0,r4_used = 0,r4_unmatched = 0;

struct r5 {
	char id[8];
	char name[38];
} *r5 = NULL;
int r5_alloc = 0,r5_used = 0;

struct r6 {
	char id[10];
	char addr[58];
} *r6 = NULL;
int r6_alloc = 0,r6_used = 0,r6_unmatched = 0;

#define RESERVE(array)							\
	if (array##_used < array##_alloc) ; else			\
		array = realloc(array,sizeof *array *			\
			(array##_alloc =  array##_alloc ? array##_alloc*2 : 10))

#define SEARCH(array,key,first) do {					\
	int last = array##_used;					\
	first = 0;							\
	while (first < last) {						\
		const int middle = first + (last - first) / 2;		\
		if (memcmp(array[middle].id,key,sizeof array->id) < 0)	\
			first = middle + 1;				\
		else 							\
			last = middle;					\
	}								\
} while (0)

static int r4_compare(const void *a,const void *b) {
	return memcmp(a,b,sizeof r4->id);
}

static int r5_compare(const void *a,const void *b) {
	return memcmp(a,b,sizeof r5->id);
}

static int r6_compare(const void *a,const void *b) {
	return memcmp(a,b,sizeof r6->id);
}

static int is_good(const char *line,int length) {
	static int warned_version = 0;
	if (strlen(line) < length) {
		fprintf(stderr,"warning: truncated '%c' record\n",*line);
		return 0;
	}
	if (!warned_version && (memcmp(line + 3,"04",2) && memcmp(line + 3,"05",2))) { // TIGER Second Edition produced in 2004 and 2005
		fprintf(stderr,"warning: unexpected version code %.4s\n",line + 1);
		warned_version = 1;
	}
	return 1;
}

static void print_side(char side,
	const char *addr,const char *zip,
	const char *geo,const char *name) 
{
	const char *addr1,*addr2,*geo1,*geo2;
/*	char buffer[41]; */

	if (memcmp(addr,addr + 11,11) > 0) {
		addr1 = addr + 11;
		addr2 = addr;
		geo1 = geo + 19;
		geo2 = geo;
	}
	else {
		addr1 = addr;
		addr2 = addr + 11;
		geo1 = geo;
		geo2 = geo + 19;
	}

/*
	memcpy(buffer,name,2);
	buffer[2] = ' ';
	memcpy(buffer + 3,name + 2,30);
	buffer[33] = ' ';
	memcpy(buffer + 34,name + 32,4);
	buffer[38] = ' ';
	memcpy(buffer + 39,name + 36,2);
	syn_normalize(NULL,buffer,sizeof buffer);
*/

	printf("G%.5s%c%.2s %.30s %.4s %.2s           %.11s%c%.19s\n",
	       zip,(addr1[10] % 2) ? 'O' : 'E',
	       name,name + 2,name + 32,name + 36,
	       addr1,side,geo1);

	printf("G%.5s%c%.2s %.30s %.4s %.2s           %.11s%c%.19s\n",
	       zip,(addr2[10] % 2) ? 'O' : 'E',
	       name,name + 2,name + 32,name + 36,
	       addr2,'X',geo2);

/*
	printf("G%.5s%c%.40s%.11s%c%.19s\n",
	       zip,(addr1[10] % 2) ? 'O' : 'E',buffer,addr1,side,geo1);

	printf("G%.5s%c%.40s%.11s%c%.19s\n",
	       zip,(addr2[10] % 2) ? 'O' : 'E',buffer,addr2,'X',geo2);
*/
}

static void print(const char *addr,const char *geo,const char *name) {
	if (' ' != addr[10] && ' ' != addr[21] && ' ' != addr[52])
		print_side('L',addr,addr + 48,geo,name);
	if (' ' != addr[32] && ' ' != addr[43] && ' ' != addr[57])
		print_side('R',addr + 22,addr + 53,geo,name);
}

int main(int argc,const char *argv[]) {
	char line[256],last = '\0';

	if (argc != 1) {
		fprintf(stderr,
			"usage: %s < tiger-data | sort -f > step1-data\n"
			"For each county, concatenate in order: "
			"TGR*.RT6, TGR*.RT5, TGR*.RT4, TGR*.RT1\n"
			"Combine data for many counties, but keep "
			"the file order within each county.\n"
		        ,argv[0]);
		return 2;
	}

	while (NULL != fgets(line,sizeof line,stdin)) {
		int i;
		const char *id;

		switch (line[0]) {
		case '1': /* Complete Chain Basic Data Record */
			if (!is_good(line,228)) continue;
			if ('1' != last) {
				qsort(r6,r6_used,sizeof *r6,r6_compare);
				qsort(r4,r4_used,sizeof *r4,r4_compare);
			}

			id = line + 5;
			SEARCH(r4,id,i);
			do {
				const char *name;
				int j;

				if (i < r4_used 
				&& !memcmp(r4[i].id,id,sizeof r4->id)) {
					name = r5[r4[i++].r5id].name;
					--r4_unmatched;
				}
				else {
					name = line + 17;
					i = -1;
				}

				print(line + 58,line + 190,name);
				SEARCH(r6,id,j);
				while (j < r6_used 
				   && !memcmp(r6[j].id,id,sizeof r6->id)) {
					if (i < 0) --r6_unmatched;
					print(r6[j++].addr,line + 190,name);
				}
			}
			while (i >= 0);

			break;

		case '4': /* Index to Alternate Feature Identifiers */
			if (!is_good(line,58)) continue;

			if ('4' != last) {
				if (0 != r4_unmatched)
					fprintf(stderr,"warning: %d unmatched '4' records unused; bad order?\n",r4_unmatched);
				r4_unmatched = r4_used = 0;
				qsort(r5,r5_used,sizeof *r5,r5_compare);
			}

			id = line + 5;
			for (i = 0; i < 5; ++i) {
				const char * const feat = line + 18 + 8*i;
				int r5id;
				if (feat[7] == ' ') break;
				SEARCH(r5,feat,r5id);
				if (r5id >= r5_used 
				||  memcmp(r5[r5id].id,feat,sizeof r5->id)) {
					fprintf(stderr,"warning: '4' %.*s to missing '5' %.*s; bad order?\n",(int) sizeof r4->id,id,(int) sizeof r5->id,feat);
					continue;
				}

				RESERVE(r4);
				memcpy(r4[r4_used].id,id,sizeof r4->id);
				r4[r4_used].r5id = r5id;
				++r4_used;
				++r4_unmatched;
			}
			break;

		case '5': /* Complete Chain Feature Identifiers */
			if (strlen(line) < 56) {
				fputs("warning: invalid '5' record\n",stderr);
				continue;
			}

			id = line + 10;
			if ('5' != last) {
				if (0 != r4_unmatched) {
					fprintf(stderr,"warning: %d unmatched '4' records left over; bad order?\n",r4_unmatched);
					r4_unmatched = r4_used = 0;
				}
				r5_used = 0;
			}

			RESERVE(r5);
			memcpy(r5[r5_used].id,id,sizeof r5->id);
			memcpy(r5[r5_used].name,line + 18,sizeof r5->name);
			++r5_used;
			break;

		case '6': /* Additional Address Range and ZIP Code Data */
			if (!is_good(line,76)) continue;
			id = line + 5;

			if ('6' != last) {
				if (0 != r6_unmatched)
					fprintf(stderr,"warning: %d unmatched '6' records left over; bad order?\n",r6_unmatched);
				r6_unmatched = r6_used = 0;
			}

			RESERVE(r6);
			memcpy(r6[r6_used].id,id,sizeof r6->id);
			memcpy(r6[r6_used].addr,line + 18,sizeof r6->addr);
			++r6_used;
			++r6_unmatched;
			break;

		case '\n':
			fputs("warning: unexpected empty line\n",stderr);
			continue;

		default:
			fprintf(stderr,"warning: ignoring record type '%c'\n",
			        line[0]);
			continue;
		}

		last = line[0];
	}

	if (0 != r6_unmatched)
		fprintf(stderr,"warning: %d unmatched '6' records remain; bad order?\n",r6_unmatched);

	return 0;
}

