//***************************************************************************
// This source code is copyrighted 2002 by Google Inc.  All rights
// reserved.  You are given a limited license to use this source code for
// purposes of participating in the Google programming contest.  If you
// choose to use or distribute the source code for any other purpose, you
// must either (1) first obtain written approval from Google, or (2)
// prominently display the foregoing copyright notice and the following
// warranty and liability disclaimer on each copy used or distributed.
// 
// The source code and repository (the "Software") is provided "AS IS",
// with no warranty, express or implied, including but not limited to the
// implied warranties of merchantability and fitness for a particular
// use.  In no event shall Google Inc. be liable for any damages, direct
// or indirect, even if advised of the possibility of such damages.
//***************************************************************************


// The Ripper iterates over one or more repository files, calling
// handler methods that act on each document in turn. Handlers are
// instances of a ParseHandler sub-class. See files
// parsehandler-caturl.cc and parsehandler-preparsecat.cc for examples
// of ParseHandler sub-classes.  To add a new handler to the ripper,
// look at the sections of this file labelled "*NEW HANDLERS*" and
// make the indicated modifications. Handlers make take arguments
// supplied on the ripper command line.
//
// Note: this is the public ripper, which operates on pre-parsed repositories.

#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <vector>
#include "goo-handler-parser.h"
#include "goo-repos-reader.h"
#include "goo-parseelt.h"

// *NEW HANDLERS* - add includes here if needed



static void error(string errmsg) { cerr << errmsg << std::endl; exit(1); }

static void usage() {
  error("usage: ripper [--stop_after n] <handler options> "
        "{- | <repository files>}");
}

int num_docs_processed = 0;

class Ripper {
 public:
  Ripper();
  ~Ripper();
  void SetupHandlers();
  void ParseCmdLineArgs(int argc, char** argv);
  void RipRepository (ReposReader* rr);

  vector<string> rep_files_;

  // *NEW HANDLERS* - Add command line flags and values here
  struct {
    int stop_after;    // if non-zero, stop processing after this many docs
    bool repos_from_stdin; 
    bool handler_cat;  // simple handler to "cat" repository
    bool handler_caturl; // even simpler handler to "cat" just urls
    bool handler_geocode; // handler to index terms and locations
    int geocode_batch; // index batch number to use in output
    const char *geocode_index; // TIGER index filename to use
  } flags_;

 private:
  // list of parse-handlers to call for each document
  vector<ParseHandler*> parsehandlers_; 
};

Ripper::Ripper() {
  // *NEW HANDLERS* - Initialize command line flags and values here
  flags_.stop_after = 0;
  flags_.repos_from_stdin = false;
  flags_.handler_cat = false;
  flags_.handler_caturl = false;
  flags_.handler_geocode = false;
  flags_.geocode_index = NULL;
  flags_.geocode_batch = 0;
}

Ripper::~Ripper() {
  // delete all parse handlers
  for (vector<ParseHandler*>::iterator ph = parsehandlers_.begin();
       ph != parsehandlers_.end(); ++ph)
    delete *ph;
}

void Ripper::SetupHandlers () {

  if (flags_.handler_cat) {
    extern ParseHandler* MakeCatHandler();
    parsehandlers_.push_back(MakeCatHandler());
  }

  if (flags_.handler_caturl) {
    extern ParseHandler* MakeCatURLHandler();
    parsehandlers_.push_back(MakeCatURLHandler());
  }

  if (flags_.handler_geocode) {
    extern ParseHandler* MakeGeoCodeHandler(const char* index,int batch);
    parsehandlers_.push_back(MakeGeoCodeHandler(flags_.geocode_index,
                                                flags_.geocode_batch));
  }

  // *NEW HANDLERS* - Add handler initialization here, following
  // the handler_cat example above.

  if (parsehandlers_.size() == 0) {
    usage();
  }
}

void Ripper::RipRepository (ReposReader* rr) {
  while (!rr->AtEnd() && 
         (flags_.stop_after == 0 || num_docs_processed < flags_.stop_after)) {
    ParseElt::Process_Document(rr, &parsehandlers_);
    num_docs_processed++;
  }  
}

void Ripper::ParseCmdLineArgs(int argc, char** argv) {
  argv++;
  for (int i = 1; i < argc; ++i, ++argv) {
    if ((*argv)[0] == '-' && (*argv)[1] != '\0') {  // option
      if (!strcmp(*argv, "--stop_after")) {
        ++i;
        ++argv;
        if (i >= argc) {
          usage();
        }
        flags_.stop_after = atoi(*argv);
      } else if (!strcmp(*argv, "--cat")) {
        flags_.handler_cat = true;
      } else if (!strcmp(*argv, "--caturl")) {
        flags_.handler_caturl = true;
      } else if (!strcmp(*argv, "--geocode")) {
        if (i + 2 >= argc) {
          cerr << "error: --geocode requires map file and batch id" << endl;
          usage();
        }
        flags_.handler_geocode = true;
        flags_.geocode_index = argv[1];
        flags_.geocode_batch = atoi(argv[2]);
        i += 2;
        argv += 2;
      }
      // *NEW HANDLERS* - add command line processing here

      else usage();
    } else { // repository file
      if ((*argv)[0] == '-' && (*argv)[1] == '\0') {
        flags_.repos_from_stdin = true;
      } else {
        rep_files_.push_back(string(*argv));
      }
    }
  }
  if (!flags_.repos_from_stdin && rep_files_.empty()) {
    usage();
  }
  if (flags_.repos_from_stdin && !rep_files_.empty()) {
    cerr << "Specify only one source of repository input (files or stdin)"
         << std::endl;
    usage();
  }
}



int main(int argc, char** argv) {
  Ripper ripper;

  cerr << "Welcome to the Google Programming Contest ripper." << std::endl
       << "Please see the file LICENSE for terms of use of "
       << "the data and code." << std::endl;

  ripper.ParseCmdLineArgs(argc, argv);
  ripper.SetupHandlers();
  if (ripper.flags_.repos_from_stdin) {
    ReposReader reprdr(&cin, string("<stdin>"));
    ripper.RipRepository(&reprdr);
  } else {
    for (vector<string>::iterator repname = ripper.rep_files_.begin();
         repname != ripper.rep_files_.end(); ++repname) {
      std::ifstream repstream((*repname).c_str());
      if (! repstream) {
        cerr << "Cannot open repository file " << *repname 
             << ", skipping it" << std::endl;
      } else {
        ReposReader reprdr(&repstream, *repname);
        ripper.RipRepository(&reprdr);
      }
      repstream.close();
    }
  }
  return 0;
}

