/* Copyright 2002 Daniel Egnor.  See LICENSE file.
 *
 * This code is the client interface to document indexes.  It's all
 * pretty straightforward inverted-index processing (I bet you have a
 * class structure not unlike this one somewhere in Google's guts...),
 * but the GeoCursor class is a little different. */

#include "idx.h"

extern "C" {
#include "io.h"
#include "geodesy.h"
}

#include <iostream>
#include <algorithm>
#include <vector>

#include <stdlib.h>
#include <math.h>

using namespace std;

int ZoneCursor::Get(int at_least) {
  int first, next = pos_;
  /* PERF: should use binary search... */
  do {
    pos_ = next;
    if (pos_ < 0 || pos_ >= end_) return -1;
    next = io_in_i4(index_, pos_, &first);
  } while (first < at_least || next < 0);
  return first;
}

int AndCursor::Get(int at_least) {
  vector<Cursor *>::const_iterator source = terms_.end();
  vector<Cursor *>::const_iterator ptr = terms_.begin();
  if (ptr == source) return -1;
  do {
    if (NULL == *ptr) return -1;
    const int first = (*ptr)->Get(at_least);
    if (first < 0) return -1;
    if (first > at_least) {
      source = ptr;
      at_least = first;
    }
    if (++ptr == terms_.end()) ptr = terms_.begin();
  } while (ptr != source);
  return at_least;
}

int OrCursor::Get(int at_least) {
  int best = -1;
  vector<Cursor *>::const_iterator ptr;
  for (ptr = terms_.begin(); ptr != terms_.end(); ++ptr) {
    if (NULL == *ptr) continue;
    const int first = (*ptr)->Get(at_least);
    if (first > 0 && (first < best || -1 == best)) best = first;
  }
  return best;
}

TermCursor::TermCursor(io_file *index, const char t[])
  : index_(index) {
  int term, end;
  if (io_in_i4(index,
      io_in_i4(index,
      io_in_i4(index, 0, NULL), &term), &end) < 0)
    term_ = NULL;
  else
    term_ = Find(term, end, t);
}

TermCursor::~TermCursor() {
  delete term_;
}

int TermCursor::Get(int at_least) {
  if (NULL == term_) return -1;
  return term_->Get(at_least);
}

Cursor *TermCursor::Find(int begin, int end, const char t[]) {
  const int size = 13;
  const int count = (end - begin) / size;
  if (count <= 0) return NULL;

  char test[9];
  if (1 == count) {
    pair<int, int> r;
    const int pos = io_in(index_, begin, test, sizeof test);
    if (pos < 0
    ||  strncasecmp(t, test, sizeof test)
    ||  io_in_i4(index_, 
        io_in(index_, 
        io_in_i4(index_, pos, &r.first), NULL, sizeof test), &r.second) < 0) {
      return NULL;
    }
    return new ZoneCursor(index_, r);
  }

  const int mid = begin + size * (count / 2);
  if (io_in(index_, mid - size, test, sizeof test) < 0)
    return NULL;
  if (strncasecmp(t, test, sizeof test) > 0)
    return Find(mid, end, t);
  else
    return Find(begin, mid, t);
}

/* The constructor for GeoCursor does most of the work; it finds the
 * right region codes to make an approximate match to the area of
 * interest, and constructs an OrCursor object that will return those
 * results.  Upon iteration the results will be filtered more closely. */
GeoCursor::GeoCursor(io_file *index, double lon, double lat, double radius)
  : index_(index), latitude_(lat), longitude_(lon), radius_(radius) {
  const double center_north = geo_mercator_northing(latitude_);
  const double center_east = geo_mercator_easting(longitude_);
  int east_code = 0, north_code = 0, exponent;
  for (exponent = 16; exponent >= 1; --exponent) {
    const int divisor = 1 << exponent;
    const double east = floor(center_east * divisor);
    const double north = floor(center_north * divisor);
    const double west_long = geo_mercator_longitude((east - 1) / divisor);
    const double east_long = geo_mercator_longitude((east + 2) / divisor);
    const double south_lat = geo_mercator_latitude((north - 1) / divisor);
    const double north_lat = geo_mercator_latitude((north + 2) / divisor);
    if (west_long < east_long && south_lat < north_lat
    &&  geo_distance(south_lat, longitude_, latitude_, longitude_) > radius_
    &&  geo_distance(north_lat, longitude_, latitude_, longitude_) > radius_
    &&  geo_distance(latitude_, east_long, latitude_, longitude_) > radius_
    &&  geo_distance(latitude_, west_long, latitude_, longitude_) > radius_) {
      east_code = abs((int) east);
      north_code = abs((int) north);
// cerr << "geo: adding " << exponent << "/" << east_code << "/" << north_code << endl;
// cerr << "geo: bounds " << south_lat << ", " << east_long << " - " << north_lat << ", " << west_long << endl;
      break;
    }
  }

  int region, term;
  if (exponent >= 1
  &&  io_in_i4(index_, io_in_i4(index_, 0, &region), &term) >= 0) {
    for (int e = east_code - 1; e <= east_code + 1; ++e)
      for (int n = north_code - 1; n <= north_code + 1; ++n)
        regions_.push_back(Find(region, term, exponent, e, n));
    approx_ = new OrCursor(regions_.begin(), regions_.end());
  } else
    approx_ = Find(region, term, 0, 0, 0);
}

/* Delete all the crap we built. */
GeoCursor::~GeoCursor() {
  delete approx_;
  vector<Cursor *>::const_iterator i;
  for (i = regions_.begin(); i != regions_.end(); ++i) delete *i;
}

/* Get a result, but make sure it actually fits in the region.
 * Remember, the approximate region matching is just that... approximate.
 * It's a conservative estimate; we have to do the actual distance
 * calculation here. */
int GeoCursor::Get(int at_least) {
  if (NULL == approx_) return -1;
  for (;;) {
    at_least = approx_->Get(at_least); 
    if (at_least < 0) return -1;

    Document doc(index_, at_least);
    for (int i = 0; i < doc.size(); ++i)
      if (geo_distance(latitude_, longitude_,
                       doc[i].Latitude(), doc[i].Longitude()) <= radius_)
        return at_least;

    ++at_least;
  }
}

/* This private method locates the zone associated with a region code and
 * returns a ZoneCursor object (or NULL). */
Cursor *GeoCursor::Find(int begin, int end, signed char exponent, int east, int north) {
  const int size = 13;
  const int count = (end - begin) / size;
  if (count <= 0) return NULL;

  signed char test_exp;
  int test_east, test_north;
  if (1 == count) {
    pair<int, int> r;
    const int pos = io_in_i4(index_, io_in_i4(index_, io_in_i1(index_, begin,
                    &test_exp), &test_east), &test_north);
    if (pos < 0
    ||  test_exp != exponent || test_east != east || test_north != north
    ||  io_in_i4(index_,
        io_in(index_,
        io_in_i4(index_, pos, &r.first), NULL, size - 4), &r.second) < 0)
      return NULL;
    return new ZoneCursor(index_, r);
  }

  const int mid = begin + size * (count / 2);
  if (io_in_i4(index_, 
      io_in_i4(index_, 
      io_in_i1(index_, mid - size, &test_exp), &test_east), &test_north) < 0)
    return NULL;

  if (make_pair(exponent, make_pair(east, north))
   >  make_pair(test_exp, make_pair(test_east, test_north)))
    return Find(mid, end, exponent, east, north);
  else
    return Find(begin, mid, exponent, east, north);
}

/* Read the vital statistics about an address from the index. */
Location::Location(io_file *index, int loc)
  : index_(index) {
  if (io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_, loc, &long_), &lat_), &begin_), NULL), NULL), &end_) < 0)
    begin_ = end_ = long_ = lat_ = 0;
}

string Location::Address() const {
  string str;
  if (begin_ != end_) {
    str.resize(end_ - begin_);
    if (io_in(index_, begin_, &str[0], str.size()) < 0)
      str.erase();
  }
  return str;
}

/* Read the vital statistics about a document from the index. */
Document::Document(io_file *index, int doc)
  : index_(index) {
  if (io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_,
      io_in_i4(index_, doc, &l_begin_), &u_begin_), &l_end_), &u_end_) < 0)
    l_begin_ = l_end_ = u_begin_ = u_end_ = 0;
}

string Document::URL() const {
  if (url_.empty()) GetName();
  return url_;
}

string Document::Title() const {
  if (title_.empty()) GetName();
  return title_;
}

void Document::GetName() const {
  string str;
  if (u_begin_ != u_end_) {
    str.resize(u_end_ - u_begin_);
    if (io_in(index_, u_begin_, &str[0], str.size()) < 0)
      str.erase();
  }

  string::size_type pos = str.find('|');
  url_ = str.substr(0, pos);
  if (pos != string::npos) title_ = str.substr(pos + 1);
}

Location Document::operator[](int i) const {
  return Location(index_, l_begin_ + l_size_ * i);
}

