a00485_source.html

/*

 * Copyright (c) 2002, Adam Dunkels.

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions

 * are met:

 * 1. Redistributions of source code must retain the above copyright

 *    notice, this list of conditions and the following disclaimer.

 * 2. Redistributions in binary form must reproduce the above

 *    copyright notice, this list of conditions and the following

 *    disclaimer in the documentation and/or other materials provided

 *    with the distribution.

 * 3. The name of the author may not be used to endorse or promote

 *    products derived from this software without specific prior

 *    written permission.

 *

 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS

 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY

 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE

 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 *

 * This file is part of the Contiki desktop environment

 *

 *

 */


/* htmlparser.c:

 *

 * Implements a very simplistic HTML parser. It recognizes HTML links

 * (<a href>-tags), HTML img alt tags, a few text flow break tags

G * (<br>, <p>, <h>), the <li> tag (but does not even try to

 * distinguish between <ol> or <ul>) as well as HTML comment tags

 * (<!-- -->).

 *

 * To save memory, the HTML parser is state machine driver, which

 * means that it will shave off one character from the HTML page,

 * process that character, and return to the next. Another way of

 * doing it would be to buffer a number of characters and process them

 * together.

 *

 * The main function in this file is the htmlparser_parse() function

 * which takes a htmlparser_state structur and a part of an HTML file

 * as an argument. The htmlparser_parse() function will call the

 * helper functions parse_char() and parse_tag(). Those functions will

 * in turn call the two callback functions htmlparser_char() and

 * htmlparser_tag(). Those functions must be implemented by the using

 * module (e.g., a web browser program).

 *

 * htmlparser_char() will be called for every non-tag character.

 *

 * htmlparser_tag() will be called whenever a full tag has been found.

 *

 */


#include <string.h>


#include "contiki.h"

#include "html-strings.h"

#include "www.h"


#include "htmlparser.h"


#if 1

#define PRINTF(x)

#else

#include <stdio.h>

#define PRINTF(x) printf x

#endif


/*-----------------------------------------------------------------------------------*/

#define ISO_A     0x41

#define ISO_B     0x42

#define ISO_E     0x45

#define ISO_F     0x46

#define ISO_G     0x47

#define ISO_H     0x48

#define ISO_I     0x49

#define ISO_L     0x4c

#define ISO_M     0x4d

#define ISO_P     0x50

#define ISO_R     0x52

#define ISO_T     0x54


#define ISO_a     (ISO_A | 0x20)

#define ISO_b     (ISO_B | 0x20)

#define ISO_e     (ISO_E | 0x20)

#define ISO_f     (ISO_F | 0x20)

#define ISO_g     (ISO_G | 0x20)

#define ISO_h     (ISO_H | 0x20)

#define ISO_i     (ISO_I | 0x20)

#define ISO_l     (ISO_L | 0x20)

#define ISO_m     (ISO_M | 0x20)

#define ISO_p     (ISO_P | 0x20)

#define ISO_r     (ISO_R | 0x20)

#define ISO_t     (ISO_T | 0x20)


#define ISO_ht    0x09

#define ISO_nl    0x0a

#define ISO_cr    0x0d

#define ISO_space 0x20

#define ISO_bang  0x21

#define ISO_citation 0x22

#define ISO_ampersand 0x26

#define ISO_citation2 0x27

#define ISO_asterisk 0x2a

#define ISO_dash  0x2d

#define ISO_slash 0x2f

#define ISO_semicolon  0x3b

#define ISO_lt    0x3c

#define ISO_eq    0x3d

#define ISO_gt    0x3e


#define MINORSTATE_NONE           0

#define MINORSTATE_TEXT           1 /* Parse normal text */

#define MINORSTATE_EXTCHAR        2 /* Check for semi-colon */

#define MINORSTATE_TAG            3 /* Check for name of tag. */

#define MINORSTATE_TAGEND         4 /* Scan for end of tag. */

#define MINORSTATE_TAGATTR        5 /* Parse tag attr. */

#define MINORSTATE_TAGATTRSPACE   6 /* Parse optional space after tag

                                       attr. */

#define MINORSTATE_TAGATTRPARAM   7 /* Parse tag attr parameter. */

#define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without

                                  quotation marks. */

#define MINORSTATE_HTMLCOMMENT    9 /* Scan for HTML comment end */


#define MAJORSTATE_NONE       0

#define MAJORSTATE_BODY       1

#define MAJORSTATE_LINK       2

#define MAJORSTATE_FORM       3

#define MAJORSTATE_DISCARD    4

#define MAJORSTATE_SCRIPT     5


struct htmlparser_state {


  unsigned char minorstate;

  char tag[20];

  unsigned char tagptr;

  char tagattr[20];

  unsigned char tagattrptr;

  char tagattrparam[WWW_CONF_MAX_URLLEN + 1];

  unsigned char tagattrparamptr;

  unsigned char quotechar;

  unsigned char majorstate, lastmajorstate;

  char linkurl[WWW_CONF_MAX_URLLEN + 1];


  char word[WWW_CONF_WEBPAGE_WIDTH];

  unsigned char wordlen;


#if WWW_CONF_FORMS

  char formaction[WWW_CONF_MAX_FORMACTIONLEN + 1];

  unsigned char inputtype;

  char inputname[WWW_CONF_MAX_INPUTNAMELEN + 1];

  char inputvalue[WWW_CONF_MAX_INPUTVALUELEN + 1];

  unsigned char inputvaluesize;

#endif /* WWW_CONF_FORMS */

};


static struct htmlparser_state s;


/*-----------------------------------------------------------------------------------*/

static char last[1] = {(char)0xff};


static const char *tags[] = {

#define TAG_FIRST       0

#define TAG_SLASHA      0

  html_slasha,

#define TAG_SLASHDIV    1

  html_slashdiv,

#define TAG_SLASHFORM   2

  html_slashform,

#define TAG_SLASHH      3

  html_slashh,

#define TAG_SLASHSCRIPT 4

  html_slashscript,

#define TAG_SLASHSELECT 5

  html_slashselect,

#define TAG_SLASHSTYLE  6

  html_slashstyle,

#define TAG_A           7

  html_a,

#define TAG_BODY        8

  html_body,

#define TAG_BR          9

  html_br,

#define TAG_FORM       10

  html_form,

#define TAG_H1         11

  html_h1,

#define TAG_H2         12

  html_h2,

#define TAG_H3         13

  html_h3,

#define TAG_H4         14

  html_h4,

#define TAG_IMG        15

  html_img,

#define TAG_INPUT      16

  html_input,

#define TAG_LI         17

  html_li,

#define TAG_P          18

  html_p,

#define TAG_SCRIPT     19

  html_script,

#define TAG_SELECT     20

  html_select,

#define TAG_STYLE      21

  html_style,

#define TAG_TR         22

  html_tr,

#define TAG_LAST       23

  last,

};


/*-----------------------------------------------------------------------------------*/

static unsigned char

iswhitespace(char c)

{

  return (c == ISO_space ||

          c == ISO_nl ||

          c == ISO_cr ||

          c == ISO_ht);

}

/*-----------------------------------------------------------------------------------*/

#if WWW_CONF_FORMS

static void

init_input(void)

{

  s.inputtype = HTMLPARSER_INPUTTYPE_NONE;

  s.inputname[0] = s.inputvalue[0] =

  s.formaction[WWW_CONF_MAX_FORMACTIONLEN] =

  s.inputname[WWW_CONF_MAX_INPUTNAMELEN] =

  s.inputvalue[WWW_CONF_MAX_INPUTVALUELEN] = 0;

  s.inputvaluesize = 20; /* De facto default size */

}

#endif /* WWW_CONF_FORMS */

/*-----------------------------------------------------------------------------------*/

void

htmlparser_init(void)

{

  s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;

  s.minorstate = MINORSTATE_TEXT;

  s.wordlen = 0;

#if WWW_CONF_FORMS

  s.formaction[0] = 0;

#endif /* WWW_CONF_FORMS */

}

/*-----------------------------------------------------------------------------------*/

static char

lowercase(char c)

{

  /* XXX: This is a *brute force* approach to lower-case

     converting and should *not* be used anywhere else! It

     works for our purposes, however (i.e., HTML tags). */

  if(c > 0x40) {

    return (c & 0x1f) | 0x60;

  } else {

    return c;

  }

}

/*-----------------------------------------------------------------------------------*/

static void

endtagfound(void)

{

  s.tag[s.tagptr] = 0;

  s.tagattr[s.tagattrptr] = 0;

  s.tagattrparam[s.tagattrparamptr] = 0;

}

/*-----------------------------------------------------------------------------------*/

static void

switch_majorstate(unsigned char newstate)

{

  if(s.majorstate != newstate) {

    PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));

    s.lastmajorstate = s.majorstate;

    s.majorstate = newstate;

  }

}

/*-----------------------------------------------------------------------------------*/

static void

add_char(unsigned char c)

{

  if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {

    s.word[s.wordlen] = c;

    ++s.wordlen;

  }

}

/*-----------------------------------------------------------------------------------*/

static void

do_word(void)

{

  if(s.wordlen > 0) {

    if(s.majorstate == MAJORSTATE_LINK) {

      if(s.word[s.wordlen - 1] != ISO_space) {

        add_char(ISO_space);

      }

    } else if(s.majorstate >= MAJORSTATE_DISCARD) {

      s.wordlen = 0;

    } else {

      s.word[s.wordlen] = '\0';

      htmlparser_word(s.word, s.wordlen);

      s.wordlen = 0;

    }

  }

}

/*-----------------------------------------------------------------------------------*/

static void

newline(void)

{

  do_word();

  htmlparser_newline();

}

/*-----------------------------------------------------------------------------------*/

static unsigned char

find_tag(char *tag)

{

  static unsigned char first, last, i, tabi;

  static char tagc;


  first = TAG_FIRST;

  last = TAG_LAST;

  i = 0;


  do {

    tagc = tag[i];


    if((tagc == 0 || tagc == ISO_slash) && tags[first][i] == 0) {

      return first;

    }


    tabi = first;


    /* First, find first matching tag from table. */

    while(tagc > (tags[tabi])[i] && tabi < last) {

      ++tabi;

    }

    first = tabi;


    /* Second, find last matching tag from table. */

    while(tagc == (tags[tabi])[i] && tabi < last) {

      ++tabi;

    }

    last = tabi;


    /* If first and last matching tags are equal, we have a non-match

       and return. Else we continue with the next character. */

    ++i;


  } while(last != first);

  return TAG_LAST;

}

/*-----------------------------------------------------------------------------------*/

static void

parse_tag(void)

{

  static char *tagattrparam;

  static unsigned char tag;

  static unsigned char size;


  tag = find_tag(s.tag);

  /* If we are inside a <script> we mustn't interpret any tags

     (inside JavaScript strings) but wait for the </script>. */

  if(s.majorstate == MAJORSTATE_SCRIPT && tag != TAG_SLASHSCRIPT) {

    return;

  }


  PRINTF(("Parsing tag '%s' '%s' '%s'\n", s.tag, s.tagattr, s.tagattrparam));


  switch(tag) {

  case TAG_P:

  case TAG_H1:

  case TAG_H2:

  case TAG_H3:

  case TAG_H4:

    newline();

    /* FALLTHROUGH */

  case TAG_BR:

  case TAG_TR:

  case TAG_SLASHDIV:

  case TAG_SLASHH:

    newline();

    break;

  case TAG_LI:

    if(s.tagattr[0] == 0) {

      newline();

      add_char(ISO_asterisk);

      add_char(ISO_space);

    }

    break;

  case TAG_SCRIPT:

    switch_majorstate(MAJORSTATE_SCRIPT);

    break;

  case TAG_STYLE:

  case TAG_SELECT:

    switch_majorstate(MAJORSTATE_DISCARD);

    break;

  case TAG_SLASHSCRIPT:

  case TAG_SLASHSTYLE:

  case TAG_SLASHSELECT:

    do_word();

    switch_majorstate(s.lastmajorstate);

    break;

  case TAG_BODY:

    s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;

    break;

  case TAG_IMG:

    if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 && s.tagattrparam[0] != 0) {

      add_char(ISO_lt);

      tagattrparam = &s.tagattrparam[0];

      while(*tagattrparam) {

        add_char(*tagattrparam);

        ++tagattrparam;

      }

      add_char(ISO_gt);

      do_word();

    }

    break;

  case TAG_A:

    PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));

    if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 && s.tagattrparam[0] != 0) {

      strcpy(s.linkurl, s.tagattrparam);

      do_word();

      switch_majorstate(MAJORSTATE_LINK);

    }

    break;

  case TAG_SLASHA:

    if(s.majorstate == MAJORSTATE_LINK) {

      switch_majorstate(s.lastmajorstate);

      s.word[s.wordlen] = 0;

      htmlparser_link(s.word, s.wordlen, s.linkurl);

      s.wordlen = 0;

    }

    break;

#if WWW_CONF_FORMS

  case TAG_FORM:

    /* First check if we are called at the end of a form tag. If

       so, we should propagate the form action. */

    if(s.tagattr[0] == 0 && s.formaction[0] != 0) {

      htmlparser_form(s.formaction);

      init_input();

    } else {

      PRINTF(("Form tag\n"));

      switch_majorstate(MAJORSTATE_FORM);

      if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {

        PRINTF(("Form action '%s'\n", s.tagattrparam));

        strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);

      }

    }

    break;

  case TAG_SLASHFORM:

    switch_majorstate(MAJORSTATE_BODY);

    s.formaction[0] = 0;

    break;

  case TAG_INPUT:

    if(s.majorstate == MAJORSTATE_FORM) {

      /* First check if we are called at the end of an input tag. If

         so, we should render the input widget. */

      if(s.tagattr[0] == 0 && s.inputname[0] != 0) {

        PRINTF(("Render input type %d\n", s.inputtype));

        switch(s.inputtype) {

        case HTMLPARSER_INPUTTYPE_NONE:

        case HTMLPARSER_INPUTTYPE_TEXT:

        case HTMLPARSER_INPUTTYPE_HIDDEN:

          htmlparser_inputfield(s.inputtype, s.inputvaluesize, s.inputvalue, s.inputname);

          break;

        case HTMLPARSER_INPUTTYPE_SUBMIT:

        case HTMLPARSER_INPUTTYPE_IMAGE:

          htmlparser_submitbutton(s.inputvalue, s.inputname);

          break;

        }

        init_input();

      } else {

        PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));

        if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {

          if(strncmp(s.tagattrparam, html_submit, sizeof(html_submit)) == 0) {

            s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;

          } else if(strncmp(s.tagattrparam, html_image, sizeof(html_image)) == 0) {

            s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;

          } else if(strncmp(s.tagattrparam, html_text, sizeof(html_text)) == 0) {

            s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;

          } else if(strncmp(s.tagattrparam, html_hidden, sizeof(html_hidden)) == 0) {

            s.inputtype = HTMLPARSER_INPUTTYPE_HIDDEN;

          } else {

            s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;

          }

        } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {

          strncpy(s.inputname, s.tagattrparam, WWW_CONF_MAX_INPUTNAMELEN);

        } else if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&

                  s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {

          strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN);

        } else if(strncmp(s.tagattr, html_value, sizeof(html_value)) == 0) {

          strncpy(s.inputvalue, s.tagattrparam, WWW_CONF_MAX_INPUTVALUELEN);

        } else if(strncmp(s.tagattr, html_size, sizeof(html_size)) == 0) {

          size = 0;

          if(s.tagattrparam[0] >= '0' &&

             s.tagattrparam[0] <= '9') {

            size = s.tagattrparam[0] - '0';

            if(s.tagattrparam[1] >= '0' &&

               s.tagattrparam[1] <= '9') {

              size = size * 10 + (s.tagattrparam[1] - '0');

            }

          }

          if(size >= WWW_CONF_MAX_INPUTVALUELEN) {

            size = WWW_CONF_MAX_INPUTVALUELEN - 1;

          }

          s.inputvaluesize = size;

        }

      }

    }

    break;

#endif /* WWW_CONF_FORMS */

  }

}

/*-----------------------------------------------------------------------------------*/

static uint16_t

parse_word(char *data, uint8_t dlen)

{

  static uint8_t i;

  static uint8_t len;

  unsigned char c;


  len = dlen;


  switch(s.minorstate) {

  case MINORSTATE_TEXT:

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(iswhitespace(c)) {

        do_word();

      } else if(c == ISO_lt) {

        s.minorstate = MINORSTATE_TAG;

        s.tagptr = 0;

        break;

      } else if(c == ISO_ampersand) {

        s.minorstate = MINORSTATE_EXTCHAR;

        break;

      } else {

        add_char(c);

      }

    }

    break;

  case MINORSTATE_EXTCHAR:

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == ISO_semicolon) {

        s.minorstate = MINORSTATE_TEXT;

        add_char(' ');

        break;

      } else if(iswhitespace(c)) {

        s.minorstate = MINORSTATE_TEXT;

        add_char('&');

        add_char(' ');

        break;

      }

    }

    break;

  case MINORSTATE_TAG:

    /* If we are inside a <srcipt> we mustn't mistake a JavaScript

       equation with a '<' as a tag. So we check for the very next

       character to be a '/' as we're only interested in parsing

       the </script>. */

    if(s.majorstate == MAJORSTATE_SCRIPT && data[0] != ISO_slash) {

      s.minorstate = MINORSTATE_TEXT;

      break;

    }


    /* We are currently parsing within the name of a tag. We check

       for the end of a tag (the '>' character) or whitespace (which

       indicates that we should parse a tag attr argument

       instead). */

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == ISO_gt) {

        /* Full tag found. We continue parsing regular text. */

        s.minorstate = MINORSTATE_TEXT;

        s.tagattrptr = s.tagattrparamptr = 0;

        endtagfound();

        parse_tag();

        break;

      } else if(iswhitespace(c)) {

        /* The name of the tag found. We continue parsing the tag

           attr.*/

        s.minorstate = MINORSTATE_TAGATTR;

        s.tagattrptr = 0;

        endtagfound();

        break;

      } else {

        /* Keep track of the name of the tag, but convert it to

           lower case. */

        s.tag[s.tagptr] = lowercase(c);

        ++s.tagptr;

        /* Check if the ->tag field is full. If so, we just eat up

           any data left in the tag. */

        if(s.tagptr == sizeof(s.tag)) {

          s.minorstate = MINORSTATE_TAGEND;

          break;

        }

      }


      /* Check for HTML comment, indicated by <!-- */

      if(s.tagptr == 3 &&

         s.tag[0] == ISO_bang &&

         s.tag[1] == ISO_dash &&

         s.tag[2] == ISO_dash) {

        PRINTF(("Starting comment...\n"));

        s.minorstate = MINORSTATE_HTMLCOMMENT;

        s.tagptr = 0;

        endtagfound();

        break;

      }

    }

    break;

  case MINORSTATE_TAGATTR:

    /* We parse the "tag attr", i.e., the "href" in <a

       href="...">. */

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == ISO_gt) {

        /* Full tag found. */

        s.minorstate = MINORSTATE_TEXT;

        s.tagattrparamptr = 0;

        s.tagattrptr = 0;

        endtagfound();

        parse_tag();

        s.tagptr = 0;

        endtagfound();

        break;

      } else if(iswhitespace(c)) {

        if(s.tagattrptr == 0) {

          /* Discard leading spaces. */

        } else {

          /* A non-leading space is the end of the attribute. */

          s.tagattrparamptr = 0;

          endtagfound();

          parse_tag();

          s.minorstate = MINORSTATE_TAGATTRSPACE;

          break;

        }

      } else if(c == ISO_eq) {

        s.minorstate = MINORSTATE_TAGATTRPARAMNQ;

        s.tagattrparamptr = 0;

        endtagfound();

        break;

      } else {

        s.tagattr[s.tagattrptr] = lowercase(c);

        ++s.tagattrptr;

        /* Check if the "tagattr" field is full. If so, we just eat

           up any data left in the tag. */

        if(s.tagattrptr == sizeof(s.tagattr)) {

          s.minorstate = MINORSTATE_TAGEND;

          break;

        }

      }

    }

    break;

  case MINORSTATE_TAGATTRSPACE:

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(iswhitespace(c)) {

        /* Discard spaces. */

      } else if(c == ISO_eq) {

        s.minorstate = MINORSTATE_TAGATTRPARAMNQ;

        s.tagattrparamptr = 0;

        endtagfound();

        parse_tag();

        break;

      } else {

        s.tagattr[0] = lowercase(c);

        s.tagattrptr = 1;

        s.minorstate = MINORSTATE_TAGATTR;

        break;

      }

    }

    break;

  case MINORSTATE_TAGATTRPARAMNQ:

    /* We are parsing the "tag attr parameter", i.e., the link part

       in <a href="link">. */

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == ISO_gt) {

        /* Full tag found. */

        endtagfound();

        parse_tag();

        s.minorstate = MINORSTATE_TEXT;

        s.tagattrptr = 0;

        endtagfound();

        parse_tag();

        s.tagptr = 0;

        endtagfound();

        break;

      } else if(iswhitespace(c) && s.tagattrparamptr == 0) {

        /* Discard leading spaces. */

      } else if((c == ISO_citation ||

                 c == ISO_citation2) && s.tagattrparamptr == 0) {

        s.minorstate = MINORSTATE_TAGATTRPARAM;

        s.quotechar = c;

        PRINTF(("tag attr param q found\n"));

        break;

      } else if(iswhitespace(c)) {

        PRINTF(("Non-leading space found at %d\n", s.tagattrparamptr));

        /* Stop parsing if a non-leading space was found */

        endtagfound();

        parse_tag();


        s.minorstate = MINORSTATE_TAGATTR;

        s.tagattrptr = 0;

        endtagfound();

        break;

      } else {

        s.tagattrparam[s.tagattrparamptr] = c;

        ++s.tagattrparamptr;

        /* Check if the "tagattr" field is full. If so, we just eat

           up any data left in the tag. */

        if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {

          s.minorstate = MINORSTATE_TAGEND;

          break;

        }

      }

    }

    break;

  case MINORSTATE_TAGATTRPARAM:

    /* We are parsing the "tag attr parameter", i.e., the link

       part in <a href="link">. */

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == s.quotechar) {

        /* Found end of tag attr parameter. */

        endtagfound();

        parse_tag();


        s.minorstate = MINORSTATE_TAGATTR;

        s.tagattrptr = 0;

        endtagfound();

        break;

      } else {

        if(iswhitespace(c)) {

          s.tagattrparam[s.tagattrparamptr] = ISO_space;

        } else {

          s.tagattrparam[s.tagattrparamptr] = c;

        }


        ++s.tagattrparamptr;

        /* Check if the "tagattr" field is full. If so, we just eat

           up any data left in the tag. */

        if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {

          s.minorstate = MINORSTATE_TAGEND;

          break;

        }

      }

    }

    break;

  case MINORSTATE_HTMLCOMMENT:

    for(i = 0; i < len; ++i) {

      c = data[i];

      if(c == ISO_dash) {

        ++s.tagptr;

      } else if(c == ISO_gt && s.tagptr > 0) {

        PRINTF(("Comment done.\n"));

        s.minorstate = MINORSTATE_TEXT;

        break;

      } else {

        s.tagptr = 0;

      }

    }

    break;

  case MINORSTATE_TAGEND:

    /* Discard characters until a '>' is seen. */

    for(i = 0; i < len; ++i) {

      if(data[i] == ISO_gt) {

        s.minorstate = MINORSTATE_TEXT;

        s.tagattrptr = 0;

        endtagfound();

        parse_tag();

        break;

      }

    }

    break;

  default:

    i = 0;

    break;

  }

  if(i >= len) {

    return len;

  }

  return i + 1;

}

/*-----------------------------------------------------------------------------------*/

void

htmlparser_parse(char *data, uint16_t datalen)

{

  uint16_t plen;


  while(datalen > 0) {

    if(datalen > 255) {

      plen = parse_word(data, 255);

    } else {

      plen = parse_word(data, (uint8_t)datalen);

    }

    datalen -= plen;

    data += plen;

  }

}

/*-----------------------------------------------------------------------------------*/