#
# This splits an html file into pure html and commented parts
#
%{

import java.io.*;
import java.net.*;

public class htmlsplit
{
                                /* Hold the html */
  StringBuffer htmlContents = new StringBuffer();
                                /* Hold the comments */
  StringBuffer htmlComments = new StringBuffer();

  public static void main(String argv[]) throws Exception
  {
    htmlsplit myLexer = new htmlsplit();

    if (argv.length != 1)
      {
        System.err.println("Usage: java htmlsplit (URL | file)");
        System.exit(1);
      }
    InputStream inp;
    if (argv[0].startsWith("http://"))
      { inp = new URL(argv[0]).openStream(); }
    else
      { inp = new FileInputStream(argv[0]); }
    myLexer.init(inp);
    myLexer.jax_next_token();
    System.out.println("Html\n====\n\n" + myLexer.htmlContents);
    System.out.println("Comments\n========\n\n"+myLexer.htmlComments);
  }

%}

# From the spec:
# A comment declaration consists of `<!'
# followed by zero or more comments followed by `>'. Each
# comment starts with `--' and includes all text up to
# and including the next occurrence of `--'.
# In a comment declaration, white space is allowed after each comment,
# but not before the first comment.


/ <!
    ( -- ( [^\-] | -[^\-] )* -- [\_\r\n\t]* )*
  > /
  %{ htmlComments.append(jax_text()); %} ;


# Match rest of things quickly
/[^<]+/
  %{ htmlContents.append(jax_text()); %} ;

/</
  %{ htmlContents.append('<'); %} ;

# Trailing section
%{

}

%}