//***************************************************************************
// This source code is copyrighted 2002 by Google Inc.  All rights
// reserved.  You are given a limited license to use this source code for
// purposes of participating in the Google programming contest.  If you
// choose to use or distribute the source code for any other purpose, you
// must either (1) first obtain written approval from Google, or (2)
// prominently display the foregoing copyright notice and the following
// warranty and liability disclaimer on each copy used or distributed.
// 
// The source code and repository (the "Software") is provided "AS IS",
// with no warranty, express or implied, including but not limited to the
// implied warranties of merchantability and fitness for a particular
// use.  In no event shall Google Inc. be liable for any damages, direct
// or indirect, even if advised of the possibility of such damages.
//***************************************************************************


#include <stdio.h>
#include <vector>
#include "goo-parseelt.h"
#include "goo-parsehandler-preparse.h"


// Macro for calling a method on all parse-handlers of 0 arguments
#define CALL0(method) \
  for ( vector<ParseHandler*>::iterator cur_ph = ph->begin();         \
        cur_ph != ph->end(); ++cur_ph )                                   \
    (*cur_ph)->method();

// Macro for calling a method on all parse-handlers of 1 argument
#define CALL1(method,arg) \
  for ( vector<ParseHandler*>::iterator cur_ph = ph->begin();         \
        cur_ph != ph->end(); ++cur_ph )                                   \
    (*cur_ph)->method(arg);

// Macro for calling a method on all parse-handlers of 2 arguments
#define CALL2(method,arg1,arg2) \
  for ( vector<ParseHandler*>::iterator cur_ph = ph->begin();         \
        cur_ph != ph->end(); ++cur_ph )                                   \
    (*cur_ph)->method(arg1, arg2);

// Macro for calling a method on all parse-handlers of 4 arguments
#define CALL4(method,arg1,arg2,arg3,arg4) \
  for ( vector<ParseHandler*>::iterator cur_ph = ph->begin();         \
        cur_ph != ph->end(); ++cur_ph )                                   \
    (*cur_ph)->method(arg1,arg2,arg3,arg4);

string IntToString(int i, const char* format) {
  char buf[80];
  snprintf(buf, sizeof(buf), format, i);
  return string(buf);
}

  
// This table is indexed by non-term, non-punctuation parse codes
// It will need to be updated if the parse code values are changed!
const ParseElt::ParseEltMethod ParseElt::kParseCodeTable[] = {
  NULL,  // parse code 0 is not used
  &ParseElt::Process_Header,  // kParseElt_Header
  &ParseElt::Process_ResponseCode,  // kParseElt_ResponseCode
  &ParseElt::Process_BaseURL,  // kParseElt_BaseURL
  &ParseElt::Process_Anchor,  // kParseElt_Anchor
  &ParseElt::Process_LocalName,  // kParseElt_LocalName
  &ParseElt::Process_AnchorDone,  // kParseElt_AnchorDone
  &ParseElt::Process_ChangeFontColor,  // kParseElt_ChangeFontColor
  &ParseElt::Process_ChangeFontColorEnd,  // kParseElt_ChangeFontColorEnd
  &ParseElt::Process_ChangeBGColor,  // kParseElt_ChangeBGColor
  &ParseElt::Process_ChangeBGColorEnd,  // kParseElt_ChangeBGColorEnd
  &ParseElt::Process_Image,  // kParseElt_Image
  NULL,  // was: &ParseElt::Process_ImageHeight
  NULL,  // was: &ParseElt::Process_ImageWidth
  &ParseElt::Process_Applet,  // kParseElt_Applet
  &ParseElt::Process_AppletDone,  // kParseElt_AppletDone
  &ParseElt::Process_IFrame,  // kParseElt_IFrame
  &ParseElt::Process_IFrameDone,  // kParseElt_IFrameDone
  &ParseElt::Process_Frame,  // kParseElt_Frame
  &ParseElt::Process_Area,  // kParseElt_Area
  &ParseElt::Process_Meta,  // kParseElt_Meta
  &ParseElt::Process_Frameset,  // kParseElt_Frameset
  &ParseElt::Process_FramesetDone,  // kParseElt_FramesetDone
  &ParseElt::Process_Body,  // kParseElt_Body
  &ParseElt::Process_BodyDone,  // kParseElt_BodyDone
  &ParseElt::Process_ParagraphStart,  // kParseElt_ParagraphStart
  &ParseElt::Process_ParagraphEnd,  // kParseElt_ParagraphEnd
  &ParseElt::Process_Break,  // kParseElt_Break
  &ParseElt::Process_HorizontalRule,  // kParseElt_HorizontalRule
  &ParseElt::Process_ListItem,  // kParseElt_ListItem
  &ParseElt::Process_UnorderedList,  // kParseElt_UnorderedList
  &ParseElt::Process_OrderedList,  // kParseElt_OrderedList
  &ParseElt::Process_ListDone,  // kParseElt_ListDone
  &ParseElt::Process_Div,  // kParseElt_Div
  &ParseElt::Process_DivDone,  // kParseElt_DivDone
  &ParseElt::Process_Span,  // kParseElt_Span
  &ParseElt::Process_SpanDone,  // kParseElt_SpanDone
  &ParseElt::Process_Table,  // kParseElt_Table
  &ParseElt::Process_TableDone,  // kParseElt_TableDone
  &ParseElt::Process_Caption,  // kParseElt_Caption
  &ParseElt::Process_CaptionDone,  // kParseElt_CaptionDone
  &ParseElt::Process_TableHCell,  // kParseElt_TableHCell
  &ParseElt::Process_TableDCell,  // kParseElt_TableDCell
  &ParseElt::Process_TableCellDone,  // kParseElt_TableCellDone
  &ParseElt::Process_TableRow,  // kParseElt_TableRow
  &ParseElt::Process_TableRowDone,  // kParseElt_TableRowDone
  &ParseElt::Process_Form,  // kParseElt_Form
  &ParseElt::Process_FormDone,  // kParseElt_FormDone
  &ParseElt::Process_Select,  // kParseElt_Select
  &ParseElt::Process_SelectDone,  // kParseElt_SelectDone
  &ParseElt::Process_Option,  // kParseElt_Option
  &ParseElt::Process_OptionDone,  // kParseElt_OptionDone
  &ParseElt::Process_TextArea,  // kParseElt_TextArea
  &ParseElt::Process_TextAreaDone,  // kParseElt_TextAreaDone
  &ParseElt::Process_Input,  // kParseElt_Input
  &ParseElt::Process_Heading,  // kParseElt_Heading
  &ParseElt::Process_HeadingDone,  // kParseElt_HeadingDone
  &ParseElt::Process_Noframes,  // kParseElt_Noframes
  &ParseElt::Process_NoframesDone,  // kParseElt_NoframesDone
  &ParseElt::Process_Object,  // kParseElt_Object
  &ParseElt::Process_ObjectDone,  // kParseElt_ObjectDone
  &ParseElt::Process_Param,  // kParseElt_Param
  &ParseElt::Process_Embed,  // kParseElt_Embed
  &ParseElt::Process_Head,  // kParseElt_Head
  &ParseElt::Process_HeadDone,  // kParseElt_HeadDone
  &ParseElt::Process_SetFace, // kParseElt_SetFace
  &ParseElt::Process_SetSize  // kParseElt_SetSize
};

char ParseElt::cbuf1_[kTextBufSize * 10];
char ParseElt::cbuf2_[kTextBufSize];
int ParseElt::cur_face_ = 0;
int ParseElt::cur_size_ = 0;
Document ParseElt::doc_ = Document();

void ParseElt::Process_Document(ReposReader* rr, HandlerVec ph) {
  char urlbuf[1024];
  char reurlbuf[1024];
  int len;

  doc_.Clear();

  // Process document header
  const char* p = rr->ReadCharStar(&len);
  assert(sizeof(urlbuf) - len > 0);
  strncpy(urlbuf, p, len);
  urlbuf[len] = '\0';
  doc_.set_url(urlbuf);

  p = rr->ReadCharStar(&len);
  assert(sizeof(reurlbuf) - len > 0);
  strncpy(reurlbuf, p, len);
  reurlbuf[len] = '\0';
  doc_.set_url_after_redirects(reurlbuf);

  struct in_addr ip_addr;
  ip_addr.s_addr = rr->ReadFixedUint32();
  doc_.set_ip_addr(ip_addr);

  doc_.set_content_type(ContentType(rr->ReadInt()));
  doc_.set_content_len(rr->ReadVarUint32());
  doc_.set_language(Language(rr->ReadInt()));
  doc_.set_encoding(Encoding(rr->ReadInt()));  

  CALL1(NewDocument, &doc_);

  unsigned char pc;
  if ((pc = rr->ReadByte()) != kBeginDocMarker) {
    rr->ParseError("expecting BeginDoc code");    
  }
  while ((pc = rr->ReadByte()) != kEndDocMarker) {
    if (IS_TERM_OR_PUNCT(pc)) {  // handle terms and punctuation specially
      Process_TermOrPunct(pc, rr, ph);
    } else {
      if (pc > kParseElt_LASTCODE) {
        rr->ParseError("found invalid parse code 0x" 
                       + IntToString(static_cast<int>(pc), "%2x"));
      }
      // dispatch to appropriate routine to read args and call handler method
      assert(*kParseCodeTable[pc] != NULL);
      (*kParseCodeTable[pc])(pc, rr, ph);
    }
  }
  CALL1(EndDocument, &doc_);
}


void  ParseElt::Process_Header(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Header);

  int keylen; 
  int valuelen;

  const char* p = rr->ReadCharStar(&keylen);
  assert(sizeof(cbuf1_) - keylen >= 0);
  strncpy(cbuf1_, p, keylen);

  const char* value = rr->ReadCharStar(&valuelen);

  CALL4(AddHeader, cbuf1_, keylen, value, valuelen);  
}


void ParseElt::Process_ResponseCode(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ResponseCode);
  int arg1 = rr->ReadInt();
  CALL1(AddResponseCode, arg1);
}

void ParseElt::Process_BaseURL(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_BaseURL);

  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddBaseURL, p, len);
}

void ParseElt::Process_Anchor(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Anchor);

  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddAnchor, p, len);
}

void ParseElt::Process_LocalName(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_LocalName);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddLocalName, p, len);
}

void ParseElt::Process_AnchorDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_AnchorDone);
  CALL0(AnchorDone);
}

void ParseElt::Process_ChangeFontColor(int pc, ReposReader* rr, 
                                       HandlerVec ph) {
  assert(pc == kParseElt_ChangeFontColor);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(ChangeFontColor, p, len);
}

void ParseElt::Process_ChangeFontColorEnd(int pc, ReposReader* rr, 
                                          HandlerVec ph) {
  assert(pc == kParseElt_ChangeFontColorEnd);
  CALL0(ChangeFontColorEnd);
}

void ParseElt::Process_ChangeBGColor(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ChangeBGColor);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(ChangeBGColor, p, len);
}

void ParseElt::Process_ChangeBGColorEnd(int pc, ReposReader* rr, 
                                        HandlerVec ph) {
  assert(pc == kParseElt_ChangeBGColorEnd);
  CALL0(ChangeBGColorEnd);
}

void ParseElt::Process_Image(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Image);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddImage, p, len);
}

void ParseElt::Process_Applet(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Applet);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddApplet, p, len);
}

void ParseElt::Process_AppletDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_AppletDone);
  CALL0(AddAppletDone);
}

void ParseElt::Process_IFrame(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_IFrame);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddIFrame, p, len);
}

void ParseElt::Process_IFrameDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_IFrameDone);
  CALL0(AddIFrameDone);
}

void ParseElt::Process_Frame(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Frame);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddFrame, p, len);
}

void ParseElt::Process_Area(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Area);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddArea, p, len);
}

void ParseElt::Process_Meta(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Meta);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddMeta, p, len);
}

void ParseElt::Process_Frameset(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Frameset);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddFrameset, p, len);
}

void ParseElt::Process_FramesetDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_FramesetDone);
  CALL0(AddFramesetDone);
}

void ParseElt::Process_Body(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Body);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddBody, p, len);
}

void ParseElt::Process_BodyDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_BodyDone);
  CALL0(AddBodyDone);
}

void ParseElt::Process_ParagraphStart(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ParagraphStart);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(ParagraphStart, p, len);
}

void ParseElt::Process_ParagraphEnd(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ParagraphEnd);
  CALL0(ParagraphEnd);
}

void ParseElt::Process_Break(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Break);
  CALL0(AddBreak);
}

void ParseElt::Process_HorizontalRule(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_HorizontalRule);
  CALL0(AddHorizontalRule);
}

void ParseElt::Process_ListItem(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ListItem);
  CALL0(AddListItem);
}

void ParseElt::Process_UnorderedList(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_UnorderedList);
  CALL0(AddUnorderedList);
}

void ParseElt::Process_OrderedList(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_OrderedList);
  CALL0(AddOrderedList);
}

void ParseElt::Process_ListDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ListDone);
  CALL0(AddListDone);
}

void ParseElt::Process_Div(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Div);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddDiv, p, len);
}

void ParseElt::Process_DivDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_DivDone);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddDivDone, p, len);
}

void ParseElt::Process_Span(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Span);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddSpan, p, len);
}

void ParseElt::Process_SpanDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_SpanDone);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddSpanDone, p, len);
}

void ParseElt::Process_Table(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Table);
  CALL0(AddTable);
}

void ParseElt::Process_TableDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableDone);
  CALL0(AddTableDone);
}

void ParseElt::Process_Caption(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Caption);
  CALL0(AddCaption);
}

void ParseElt::Process_CaptionDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_CaptionDone);
  CALL0(AddCaptionDone);
}

void ParseElt::Process_TableHCell(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableHCell);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddTableHCell, p, len);
}

void ParseElt::Process_TableDCell(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableDCell);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddTableDCell, p, len);
}

void ParseElt::Process_TableCellDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableCellDone);
  CALL0(AddTableCellDone);
}

void ParseElt::Process_TableRow(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableRow);
  CALL0(AddTableRow);
}

void ParseElt::Process_TableRowDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TableRowDone);
  CALL0(AddTableRowDone);
}

void ParseElt::Process_Form(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Form);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddForm, p, len);
}

void ParseElt::Process_FormDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_FormDone);
  CALL0(AddFormDone);
}

void ParseElt::Process_Select(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Select);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddSelect, p, len);
}

void ParseElt::Process_SelectDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_SelectDone);
  CALL0(AddSelectDone);
}

void ParseElt::Process_Option(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Option);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddOption, p, len);
}

void ParseElt::Process_OptionDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_OptionDone);
  CALL0(AddOptionDone);
}

void ParseElt::Process_TextArea(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TextArea);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddTextArea, p, len);
}

void ParseElt::Process_TextAreaDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_TextAreaDone);
  CALL0(AddTextAreaDone);
}

void ParseElt::Process_Input(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Input);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddInput, p, len);
}

void ParseElt::Process_Heading(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Heading);
  int arg1 = rr->ReadInt();
  CALL1(AddHeading, arg1);
}

void ParseElt::Process_HeadingDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_HeadingDone);
  CALL0(AddHeadingDone);
}

void ParseElt::Process_Noframes(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Noframes);
  CALL0(AddNoframes);
}

void ParseElt::Process_NoframesDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_NoframesDone);
  CALL0(AddNoframesDone);
}

void ParseElt::Process_Object(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Object);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddObject, p, len);
}

void ParseElt::Process_ObjectDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_ObjectDone);
  CALL0(AddObjectDone);
}

void ParseElt::Process_Param(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Param);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddParam, p, len);
}

void ParseElt::Process_Embed(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Embed);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddEmbed, p, len);
}

void ParseElt::Process_Head(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_Head);
  int len;
  const char* p = rr->ReadCharStar(&len);
  CALL2(AddHead, p, len);
}

void ParseElt::Process_HeadDone(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_HeadDone);
  CALL0(AddHeadDone);
}

void ParseElt::Process_SetFace(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_SetFace);
  cur_face_ = rr->ReadInt();
}

void ParseElt::Process_SetSize(int pc, ReposReader* rr, HandlerVec ph) {
  assert(pc == kParseElt_SetSize);
  cur_size_ = rr->ReadInt();
}

void ParseElt::Process_TermOrPunct(int pc, ReposReader* rr, HandlerVec ph) {
  int len;
  const char* pt;

  if (GET_LENGTH_FOLLOWS(pc)) {
    pt = rr->ReadCharStar(&len);
  } else {
    len = GET_LENGTH(pc);
    pt = rr->ReadCharsOnly(len);
  }
  if (IS_TERM(pc)) {
    CALL4(AddTerm, pt, len, cur_face_, cur_size_);
  } else {
    CALL4(AddPunctuation, pt, len, cur_face_, cur_size_);
  }
  if (GET_WHITESPACE_FOLLOWS(pc)) {
    CALL0(WhitespaceEndedTerm);
  }
}

