Clover coverage report - Code Coverage for tapestry release 4.0-beta-6
Coverage timestamp: Wed Sep 7 2005 18:41:34 EDT
file stats: LOC: 1,457   Methods: 33
NCLOC: 755   Classes: 2
 
 Source file Conditionals Statements Methods TOTAL
TemplateParser.java 94.1% 96.7% 93.9% 95.8%
coverage coverage
 1    // Copyright 2004, 2005 The Apache Software Foundation
 2    //
 3    // Licensed under the Apache License, Version 2.0 (the "License");
 4    // you may not use this file except in compliance with the License.
 5    // You may obtain a copy of the License at
 6    //
 7    // http://www.apache.org/licenses/LICENSE-2.0
 8    //
 9    // Unless required by applicable law or agreed to in writing, software
 10    // distributed under the License is distributed on an "AS IS" BASIS,
 11    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12    // See the License for the specific language governing permissions and
 13    // limitations under the License.
 14   
 15    package org.apache.tapestry.parse;
 16   
 17    import java.util.ArrayList;
 18    import java.util.Collections;
 19    import java.util.HashMap;
 20    import java.util.Iterator;
 21    import java.util.List;
 22    import java.util.Map;
 23   
 24    import org.apache.hivemind.ApplicationRuntimeException;
 25    import org.apache.hivemind.Location;
 26    import org.apache.hivemind.Resource;
 27    import org.apache.hivemind.impl.LocationImpl;
 28    import org.apache.oro.text.regex.MalformedPatternException;
 29    import org.apache.oro.text.regex.MatchResult;
 30    import org.apache.oro.text.regex.Pattern;
 31    import org.apache.oro.text.regex.PatternMatcher;
 32    import org.apache.oro.text.regex.Perl5Compiler;
 33    import org.apache.oro.text.regex.Perl5Matcher;
 34    import org.apache.tapestry.util.IdAllocator;
 35   
 36    /**
 37    * Parses Tapestry templates, breaking them into a series of
 38    * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
 39    * template", there is no real requirement that the template be HTML. This parser can handle any
 40    * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
 41    * HTML reasonably.
 42    * <p>
 43    * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
 44    * <p>
 45    * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
 46    * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
 47    * or end the tag with "<code>/&gt;</code>".
 48    * <p>
 49    * Generally, the id specified in the template is matched against an component defined in the
 50    * specification. However, implicit components are also possible. The jwcid attribute uses the
 51    * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
 52    * prefix. Such a component is anonymous (but is given a unique id).
 53    * <p>
 54    * (The unique ids assigned start with a dollar sign, which is normally no allowed for
 55    * component ids ... this helps to make them stand out and assures that they do not conflict
 56    * with user-defined component ids. These ids tend to propagate into URLs and become HTML
 57    * element names and even JavaScript variable names ... the dollar sign is acceptible in these
 58    * contexts as well).
 59    * <p>
 60    * Implicit component may also be given a name using the syntax "
 61    * <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
 62    * specification, but may still be accessed via
 63    * {@link org.apache.tapestry.IComponent#getComponent(String)}.
 64    * <p>
 65    * Both defined and implicit components may have additional attributes defined, simply by
 66    * including them in the template. They set formal or informal parameters of the component to
 67    * static strings.
 68    * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
 69    * false, will cause such attributes to be simply ignored. For defined components, conflicting
 70    * values defined in the template are ignored.
 71    * <p>
 72    * Attributes in component tags will become formal and informal parameters of the
 73    * corresponding component. Most attributes will be
 74    * <p>
 75    * The parser removes the body of some tags (when the corresponding component doesn't
 76    * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
 77    * allows portions of the template to be completely removed.
 78    * <p>
 79    * The parser does a pretty thorough lexical analysis of the template, and reports a great
 80    * number of errors, including improper nesting of tags.
 81    * <p>
 82    * The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
 83    * form: <code>&lt;span key="<i>value</i>"&gt; ... &lt;/span&gt;</code> and converts them
 84    * into a {@link TokenType#LOCALIZATION}token. You may also specifify a <code>raw</code>
 85    * attribute ... if the value is <code>true</code>, then the localized value is sent to the
 86    * client without filtering, which is appropriate if the value has any markup that should not
 87    * be escaped.
 88    * @author Howard Lewis Ship, Geoff Longman
 89    */
 90   
 91    public class TemplateParser implements ITemplateParser
 92    {
 93    /**
 94    * A "magic" component id that causes the tag with the id and its entire body to be ignored
 95    * during parsing.
 96    */
 97   
 98    private static final String REMOVE_ID = "$remove$";
 99   
 100    /**
 101    * A "magic" component id that causes the tag to represent the true content of the template. Any
 102    * content prior to the tag is discarded, and any content after the tag is ignored. The tag
 103    * itself is not included.
 104    */
 105   
 106    private static final String CONTENT_ID = "$content$";
 107   
 108    /**
 109    * The attribute, checked for in &lt;span&gt; tags, that signfies that the span is being used as
 110    * an invisible localization.
 111    *
 112    * @since 2.0.4
 113    */
 114   
 115    public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
 116   
 117    /**
 118    * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME}to indicate a string that should be
 119    * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
 120    * equal "true" (caselessly).
 121    *
 122    * @since 2.3
 123    */
 124   
 125    public static final String RAW_ATTRIBUTE_NAME = "raw";
 126   
 127    /**
 128    * Attribute name used to identify components.
 129    *
 130    * @since 4.0
 131    */
 132   
 133    private String _componentAttributeName;
 134   
 135    private static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
 136   
 137    /**
 138    * Pattern used to recognize ordinary components (defined in the specification).
 139    *
 140    * @since 3.0
 141    */
 142   
 143    public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
 144   
 145    /**
 146    * Pattern used to recognize implicit components (whose type is defined in the template).
 147    * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
 148    * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
 149    * type.
 150    *
 151    * @since 3.0
 152    */
 153   
 154    public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
 155    + PROPERTY_NAME_PATTERN + "):)?(" + PROPERTY_NAME_PATTERN + "))$";
 156   
 157    private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
 158   
 159    private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
 160   
 161    private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
 162   
 163    private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
 164   
 165    private Pattern _simpleIdPattern;
 166   
 167    private Pattern _implicitIdPattern;
 168   
 169    private PatternMatcher _patternMatcher;
 170   
 171    private IdAllocator _idAllocator = new IdAllocator();
 172   
 173    private ITemplateParserDelegate _delegate;
 174   
 175    /**
 176    * Identifies the template being parsed; used with error messages.
 177    */
 178   
 179    private Resource _resourceLocation;
 180   
 181    /**
 182    * Shared instance of {@link Location}used by all {@link TextToken}instances in the template.
 183    */
 184   
 185    private Location _templateLocation;
 186   
 187    /**
 188    * Location with in the resource for the current line.
 189    */
 190   
 191    private Location _currentLocation;
 192   
 193    /**
 194    * Local reference to the template data that is to be parsed.
 195    */
 196   
 197    private char[] _templateData;
 198   
 199    /**
 200    * List of Tag
 201    */
 202   
 203    private List _stack = new ArrayList();
 204   
 205    private static class Tag
 206    {
 207    // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
 208    String _tagName;
 209   
 210    // If true, the tag is a placeholder for a dynamic element
 211    boolean _component;
 212   
 213    // If true, the body of the tag is being ignored, and the
 214    // ignore flag is cleared when the close tag is reached
 215    boolean _ignoringBody;
 216   
 217    // If true, then the entire tag (and its body) is being ignored
 218    boolean _removeTag;
 219   
 220    // If true, then the tag must have a balanced closing tag.
 221    // This is always true for components.
 222    boolean _mustBalance;
 223   
 224    // The line on which the start tag exists
 225    int _line;
 226   
 227    // If true, then the parse ends when the closing tag is found.
 228    boolean _content;
 229   
 230  1391 Tag(String tagName, int line)
 231    {
 232  1391 _tagName = tagName;
 233  1391 _line = line;
 234    }
 235   
 236  1309 boolean match(String matchTagName)
 237    {
 238  1309 return _tagName.equalsIgnoreCase(matchTagName);
 239    }
 240    }
 241   
 242    /**
 243    * List of {@link TemplateToken}, this forms the ultimate response.
 244    */
 245   
 246    private List _tokens = new ArrayList();
 247   
 248    /**
 249    * The location of the 'cursor' within the template data. The advance() method moves this
 250    * forward.
 251    */
 252   
 253    private int _cursor;
 254   
 255    /**
 256    * The start of the current block of static text, or -1 if no block is active.
 257    */
 258   
 259    private int _blockStart;
 260   
 261    /**
 262    * The current line number; tracked by advance(). Starts at 1.
 263    */
 264   
 265    private int _line;
 266   
 267    /**
 268    * Set to true when the body of a tag is being ignored. This is typically used to skip over the
 269    * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
 270    * of $remove$ is used.
 271    */
 272   
 273    private boolean _ignoring;
 274   
 275    /**
 276    * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
 277    */
 278   
 279    private Map _attributes = new HashMap();
 280   
 281    /**
 282    * A factory used to create template tokens.
 283    */
 284   
 285    private TemplateTokenFactory _factory;
 286   
 287  127 public TemplateParser()
 288    {
 289  127 Perl5Compiler compiler = new Perl5Compiler();
 290   
 291  127 try
 292    {
 293  127 _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
 294  127 _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
 295    }
 296    catch (MalformedPatternException ex)
 297    {
 298  0 throw new ApplicationRuntimeException(ex);
 299    }
 300   
 301  127 _patternMatcher = new Perl5Matcher();
 302    }
 303   
 304    /**
 305    * Parses the template data into an array of {@link TemplateToken}s.
 306    * <p>
 307    * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
 308    * thread accesses it.
 309    *
 310    * @param templateData
 311    * the HTML template to parse. Some tokens will hold a reference to this array.
 312    * @param delegate
 313    * object that "knows" about defined components
 314    * @param resourceLocation
 315    * a description of where the template originated from, used with error messages.
 316    */
 317   
 318  191 public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate,
 319    Resource resourceLocation) throws TemplateParseException
 320    {
 321  191 try
 322    {
 323  191 beforeParse(templateData, delegate, resourceLocation);
 324   
 325  191 parse();
 326   
 327  177 return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
 328    }
 329    finally
 330    {
 331  191 afterParse();
 332    }
 333    }
 334   
 335    /**
 336    * perform default initialization of the parser.
 337    */
 338   
 339  191 protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate,
 340    Resource resourceLocation)
 341    {
 342  191 _templateData = templateData;
 343  191 _resourceLocation = resourceLocation;
 344  191 _templateLocation = new LocationImpl(resourceLocation);
 345  191 _delegate = delegate;
 346  191 _ignoring = false;
 347  191 _line = 1;
 348  191 _componentAttributeName = delegate.getComponentAttributeName();
 349    }
 350   
 351    /**
 352    * Perform default cleanup after parsing completes.
 353    */
 354   
 355  191 protected void afterParse()
 356    {
 357  191 _delegate = null;
 358  191 _templateData = null;
 359  191 _resourceLocation = null;
 360  191 _templateLocation = null;
 361  191 _currentLocation = null;
 362  191 _stack.clear();
 363  191 _tokens.clear();
 364  191 _attributes.clear();
 365  191 _idAllocator.clear();
 366    }
 367   
 368    /**
 369    * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
 370    * is reported.
 371    * <p>
 372    * The default implementation simply throws an exception that contains the message and location
 373    * parameters.
 374    * <p>
 375    * Subclasses may override but <b>must </b> ensure they throw the required exception.
 376    *
 377    * @param message
 378    * @param location
 379    * @param line
 380    * ignored by the default impl
 381    * @param cursor
 382    * ignored by the default impl
 383    * @throws TemplateParseException
 384    * always thrown in order to terminate the parse.
 385    */
 386   
 387  14 protected void templateParseProblem(String message, Location location, int line, int cursor)
 388    throws TemplateParseException
 389    {
 390  14 throw new TemplateParseException(message, location);
 391    }
 392   
 393    /**
 394    * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
 395    * </b> stop when a problem is reported.
 396    * <p>
 397    * The default implementation simply rethrows the exception.
 398    * <p>
 399    * Subclasses may override but <b>must </b> ensure they rethrow the exception.
 400    *
 401    * @param exception
 402    * @param line
 403    * ignored by the default impl
 404    * @param cursor
 405    * ignored by the default impl
 406    * @throws ApplicationRuntimeException
 407    * always rethrown in order to terminate the parse.
 408    */
 409   
 410  0 protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
 411    throws ApplicationRuntimeException
 412    {
 413  0 throw exception;
 414    }
 415   
 416    /**
 417    * Give subclasses access to the parse results.
 418    */
 419  0 protected List getTokens()
 420    {
 421  0 if (_tokens == null)
 422  0 return Collections.EMPTY_LIST;
 423   
 424  0 return _tokens;
 425    }
 426   
 427    /**
 428    * Checks to see if the next few characters match a given pattern.
 429    */
 430   
 431  16105 private boolean lookahead(char[] match)
 432    {
 433  16105 try
 434    {
 435  16105 for (int i = 0; i < match.length; i++)
 436    {
 437  21550 if (_templateData[_cursor + i] != match[i])
 438  14716 return false;
 439    }
 440   
 441    // Every character matched.
 442   
 443  1389 return true;
 444    }
 445    catch (IndexOutOfBoundsException ex)
 446    {
 447  0 return false;
 448    }
 449    }
 450   
 451    private static final char[] COMMENT_START = new char[]
 452    { '<', '!', '-', '-' };
 453   
 454    private static final char[] COMMENT_END = new char[]
 455    { '-', '-', '>' };
 456   
 457    private static final char[] CLOSE_TAG = new char[]
 458    { '<', '/' };
 459   
 460  191 protected void parse() throws TemplateParseException
 461    {
 462  191 _cursor = 0;
 463  191 _blockStart = -1;
 464  191 int length = _templateData.length;
 465   
 466  191 while (_cursor < length)
 467    {
 468  22005 if (_templateData[_cursor] != '<')
 469    {
 470  18953 if (_blockStart < 0 && !_ignoring)
 471  1165 _blockStart = _cursor;
 472   
 473  18953 advance();
 474  18953 continue;
 475    }
 476   
 477    // OK, start of something.
 478   
 479  3052 if (lookahead(CLOSE_TAG))
 480    {
 481  1238 closeTag();
 482  1235 continue;
 483    }
 484   
 485  1814 if (lookahead(COMMENT_START))
 486    {
 487  76 skipComment();
 488  75 continue;
 489    }
 490   
 491    // The start of some tag.
 492   
 493  1738 startTag();
 494    }
 495   
 496    // Usually there's some text at the end of the template (after the last closing tag) that
 497    // should
 498    // be added. Often the last few tags are static tags so we definately
 499    // need to end the text block.
 500   
 501  177 addTextToken(_templateData.length - 1);
 502    }
 503   
 504    /**
 505    * Advance forward in the document until the end of the comment is reached. In addition, skip
 506    * any whitespace following the comment.
 507    */
 508   
 509  76 private void skipComment() throws TemplateParseException
 510    {
 511  76 int length = _templateData.length;
 512  76 int startLine = _line;
 513   
 514  76 if (_blockStart < 0 && !_ignoring)
 515  19 _blockStart = _cursor;
 516   
 517  76 while (true)
 518    {
 519  11240 if (_cursor >= length)
 520  1 templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
 521    _resourceLocation, startLine), startLine, _cursor);
 522   
 523  11239 if (lookahead(COMMENT_END))
 524  75 break;
 525   
 526    // Not the end of the comment, advance over it.
 527   
 528  11164 advance();
 529    }
 530   
 531  75 _cursor += COMMENT_END.length;
 532  75 advanceOverWhitespace();
 533    }
 534   
 535  1694 private void addTextToken(int end)
 536    {
 537    // No active block to add to.
 538   
 539  1694 if (_blockStart < 0)
 540  419 return;
 541   
 542  1275 if (_blockStart <= end)
 543    {
 544    // This seems odd, shouldn't the location be the current location? I guess
 545    // no errors are ever reported for a text token.
 546   
 547  1275 TemplateToken token = _factory.createTextToken(
 548    _templateData,
 549    _blockStart,
 550    end,
 551    _templateLocation);
 552   
 553  1275 _tokens.add(token);
 554    }
 555   
 556  1275 _blockStart = -1;
 557    }
 558   
 559    private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
 560   
 561    private static final int COLLECT_ATTRIBUTE_NAME = 1;
 562   
 563    private static final int ADVANCE_PAST_EQUALS = 2;
 564   
 565    private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
 566   
 567    private static final int COLLECT_QUOTED_VALUE = 4;
 568   
 569    private static final int COLLECT_UNQUOTED_VALUE = 5;
 570   
 571  1738 private void startTag() throws TemplateParseException
 572    {
 573  1738 int cursorStart = _cursor;
 574  1738 int length = _templateData.length;
 575  1738 String tagName = null;
 576  1738 boolean endOfTag = false;
 577  1738 boolean emptyTag = false;
 578  1738 int startLine = _line;
 579  1738 Location startLocation = new LocationImpl(_resourceLocation, startLine);
 580   
 581  1738 tagBeginEvent(startLine, _cursor);
 582   
 583  1738 advance();
 584   
 585    // Collect the element type
 586   
 587  7415 while (_cursor < length)
 588    {
 589  7415 char ch = _templateData[_cursor];
 590   
 591  7415 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
 592    {
 593  1738 tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
 594   
 595  1738 break;
 596    }
 597   
 598  5677 advance();
 599    }
 600   
 601  1738 String attributeName = null;
 602  1738 int attributeNameStart = -1;
 603  1738 int attributeValueStart = -1;
 604  1738 int state = WAIT_FOR_ATTRIBUTE_NAME;
 605  1738 char quoteChar = 0;
 606   
 607  1738 _attributes.clear();
 608   
 609    // Collect each attribute
 610   
 611  1738 while (!endOfTag)
 612    {
 613  45489 if (_cursor >= length)
 614    {
 615  1 String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
 616    : ParseMessages.unclosedTag(tagName, startLine);
 617   
 618  1 templateParseProblem(message, startLocation, startLine, cursorStart);
 619    }
 620   
 621  45488 char ch = _templateData[_cursor];
 622   
 623  45488 switch (state)
 624    {
 625  6521 case WAIT_FOR_ATTRIBUTE_NAME:
 626   
 627    // Ignore whitespace before the next attribute name, while
 628    // looking for the end of the current tag.
 629   
 630  6521 if (ch == '/')
 631    {
 632  502 emptyTag = true;
 633  502 advance();
 634  502 break;
 635    }
 636   
 637  6019 if (ch == '>')
 638    {
 639  1734 endOfTag = true;
 640  1734 break;
 641    }
 642   
 643  4285 if (Character.isWhitespace(ch))
 644    {
 645  2087 advance();
 646  2087 break;
 647    }
 648   
 649    // Found non-whitespace, assume its the attribute name.
 650    // Note: could use a check here for non-alpha.
 651   
 652  2198 attributeNameStart = _cursor;
 653  2198 state = COLLECT_ATTRIBUTE_NAME;
 654  2198 advance();
 655  2198 break;
 656   
 657  11740 case COLLECT_ATTRIBUTE_NAME:
 658   
 659    // Looking for end of attribute name.
 660   
 661  11740 if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
 662    {
 663  2198 attributeName = new String(_templateData, attributeNameStart, _cursor
 664    - attributeNameStart);
 665   
 666  2198 state = ADVANCE_PAST_EQUALS;
 667  2198 break;
 668    }
 669   
 670    // Part of the attribute name
 671   
 672  9542 advance();
 673  9542 break;
 674   
 675  2341 case ADVANCE_PAST_EQUALS:
 676   
 677    // Looking for the '=' sign. May hit the end of the tag, or (for bare
 678    // attributes),
 679    // the next attribute name.
 680   
 681  2341 if (ch == '/' || ch == '>')
 682    {
 683    // A bare attribute, which is not interesting to
 684    // us.
 685   
 686  145 state = WAIT_FOR_ATTRIBUTE_NAME;
 687  145 break;
 688    }
 689   
 690  2196 if (Character.isWhitespace(ch))
 691    {
 692  143 advance();
 693  143 break;
 694    }
 695   
 696  2053 if (ch == '=')
 697    {
 698  1963 state = WAIT_FOR_ATTRIBUTE_VALUE;
 699  1963 quoteChar = 0;
 700  1963 attributeValueStart = -1;
 701  1963 advance();
 702  1963 break;
 703    }
 704   
 705    // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
 706    // We aren't interested in those (we're just looking for the id or jwcid
 707    // attribute).
 708   
 709  90 state = WAIT_FOR_ATTRIBUTE_NAME;
 710  90 break;
 711   
 712  1966 case WAIT_FOR_ATTRIBUTE_VALUE:
 713   
 714  1966 if (ch == '/' || ch == '>')
 715  1 templateParseProblem(ParseMessages.missingAttributeValue(
 716    tagName,
 717    _line,
 718    attributeName), getCurrentLocation(), _line, _cursor);
 719   
 720    // Ignore whitespace between '=' and the attribute value. Also, look
 721    // for initial quote.
 722   
 723  1965 if (Character.isWhitespace(ch))
 724    {
 725  3 advance();
 726  3 break;
 727    }
 728   
 729  1962 if (ch == '\'' || ch == '"')
 730    {
 731  1961 quoteChar = ch;
 732   
 733  1961 state = COLLECT_QUOTED_VALUE;
 734  1961 advance();
 735  1961 attributeValueStart = _cursor;
 736  1961 attributeBeginEvent(attributeName, _line, attributeValueStart);
 737  1961 break;
 738    }
 739   
 740    // Not whitespace or quote, must be start of unquoted attribute.
 741   
 742  1 state = COLLECT_UNQUOTED_VALUE;
 743  1 attributeValueStart = _cursor;
 744  1 attributeBeginEvent(attributeName, _line, attributeValueStart);
 745  1 break;
 746   
 747  22914 case COLLECT_QUOTED_VALUE:
 748   
 749    // Start collecting the quoted attribute value. Stop at the matching quote
 750    // character,
 751    // unless bare, in which case, stop at the next whitespace.
 752   
 753  22914 if (ch == quoteChar)
 754    {
 755  1961 String attributeValue = new String(_templateData, attributeValueStart,
 756    _cursor - attributeValueStart);
 757   
 758  1961 attributeEndEvent(_cursor);
 759   
 760  1961 addAttributeIfUnique(tagName, attributeName, attributeValue);
 761   
 762   
 763    // Advance over the quote.
 764  1959 advance();
 765  1959 state = WAIT_FOR_ATTRIBUTE_NAME;
 766  1959 break;
 767    }
 768   
 769  20953 advance();
 770  20953 break;
 771   
 772  6 case COLLECT_UNQUOTED_VALUE:
 773   
 774    // An unquoted attribute value ends with whitespace
 775    // or the end of the enclosing tag.
 776   
 777  6 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
 778    {
 779  1 String attributeValue = new String(_templateData, attributeValueStart,
 780    _cursor - attributeValueStart);
 781   
 782  1 attributeEndEvent(_cursor);
 783  1 addAttributeIfUnique(tagName, attributeName, attributeValue);
 784   
 785  1 state = WAIT_FOR_ATTRIBUTE_NAME;
 786  1 break;
 787    }
 788   
 789  5 advance();
 790  5 break;
 791    }
 792    }
 793   
 794  1734 tagEndEvent(_cursor);
 795   
 796    // Check for invisible localizations
 797   
 798  1734 String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
 799  1734 String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
 800   
 801  1734 if (localizationKey != null && tagName.equalsIgnoreCase("span") && jwcId == null)
 802    {
 803  16 if (_ignoring)
 804  1 templateParseProblem(
 805    ParseMessages.componentMayNotBeIgnored(tagName, startLine),
 806    startLocation,
 807    startLine,
 808    cursorStart);
 809   
 810    // If the tag isn't empty, then create a Tag instance to ignore the
 811    // body of the tag.
 812   
 813  15 if (!emptyTag)
 814    {
 815  3 Tag tag = new Tag(tagName, startLine);
 816   
 817  3 tag._component = false;
 818  3 tag._removeTag = true;
 819  3 tag._ignoringBody = true;
 820  3 tag._mustBalance = true;
 821   
 822  3 _stack.add(tag);
 823   
 824    // Start ignoring content until the close tag.
 825   
 826  3 _ignoring = true;
 827    }
 828    else
 829    {
 830    // Cursor is at the closing carat, advance over it and any whitespace.
 831  12 advance();
 832  12 advanceOverWhitespace();
 833    }
 834   
 835    // End any open block.
 836   
 837  15 addTextToken(cursorStart - 1);
 838   
 839  15 boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
 840   
 841  15 Map attributes = filter(_attributes, new String[]
 842    { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
 843   
 844  15 TemplateToken token = _factory.createLocalizationToken(
 845    tagName,
 846    localizationKey,
 847    raw,
 848    attributes,
 849    startLocation);
 850   
 851  15 _tokens.add(token);
 852   
 853  15 return;
 854    }
 855   
 856  1718 if (jwcId != null)
 857    {
 858  975 processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
 859  970 return;
 860    }
 861   
 862    // A static tag (not a tag without a jwcid attribute).
 863    // We need to record this so that we can match close tags later.
 864   
 865  743 if (!emptyTag)
 866    {
 867  671 Tag tag = new Tag(tagName, startLine);
 868  671 _stack.add(tag);
 869    }
 870   
 871    // If there wasn't an active block, then start one.
 872   
 873  743 if (_blockStart < 0 && !_ignoring)
 874  44 _blockStart = cursorStart;
 875   
 876  743 advance();
 877    }
 878   
 879    /**
 880    * @throws TemplateParseException
 881    * @since 4.0
 882    */
 883   
 884  1962 private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
 885    throws TemplateParseException
 886    {
 887   
 888  1962 if (_attributes.containsKey(attributeName))
 889  2 templateParseProblem(
 890    ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
 891    getCurrentLocation(),
 892    _line,
 893    _cursor);
 894   
 895  1960 _attributes.put(attributeName, attributeValue);
 896    }
 897   
 898    /**
 899    * Processes a tag that is the open tag for a component (but also handles the $remove$ and
 900    * $content$ tags).
 901    */
 902   
 903    /**
 904    * Notify that the beginning of a tag has been detected.
 905    * <p>
 906    * Default implementation does nothing.
 907    */
 908  1738 protected void tagBeginEvent(int startLine, int cursorPosition)
 909    {
 910    }
 911   
 912    /**
 913    * Notify that the end of the current tag has been detected.
 914    * <p>
 915    * Default implementation does nothing.
 916    */
 917  1734 protected void tagEndEvent(int cursorPosition)
 918    {
 919    }
 920   
 921    /**
 922    * Notify that the beginning of an attribute value has been detected.
 923    * <p>
 924    * Default implementation does nothing.
 925    */
 926  1962 protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
 927    {
 928    }
 929   
 930    /**
 931    * Notify that the end of the current attribute value has been detected.
 932    * <p>
 933    * Default implementation does nothing.
 934    */
 935  1962 protected void attributeEndEvent(int cursorPosition)
 936    {
 937    }
 938   
 939  975 private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
 940    int startLine, int cursorStart, Location startLocation) throws TemplateParseException
 941    {
 942  975 if (jwcId.equalsIgnoreCase(CONTENT_ID))
 943    {
 944  78 processContentTag(tagName, startLine, cursorStart, emptyTag);
 945   
 946  77 return;
 947    }
 948   
 949  897 boolean isRemoveId = jwcId.equalsIgnoreCase(REMOVE_ID);
 950   
 951  897 if (_ignoring && !isRemoveId)
 952  2 templateParseProblem(
 953    ParseMessages.componentMayNotBeIgnored(tagName, startLine),
 954    startLocation,
 955    startLine,
 956    cursorStart);
 957   
 958  895 String type = null;
 959  895 boolean allowBody = false;
 960   
 961  895 if (_patternMatcher.matches(jwcId, _implicitIdPattern))
 962    {
 963  509 MatchResult match = _patternMatcher.getMatch();
 964   
 965  509 jwcId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
 966  509 type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
 967   
 968  509 String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
 969  509 String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
 970   
 971    // If (and this is typical) no actual component id was specified,
 972    // then generate one on the fly.
 973    // The allocated id for anonymous components is
 974    // based on the simple (unprefixed) type, but starts
 975    // with a leading dollar sign to ensure no conflicts
 976    // with user defined component ids (which don't allow dollar signs
 977    // in the id).
 978   
 979  509 if (jwcId == null)
 980  442 jwcId = _idAllocator.allocateId("$" + simpleType);
 981   
 982  509 try
 983    {
 984  509 allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
 985    }
 986    catch (ApplicationRuntimeException e)
 987    {
 988    // give subclasses a chance to handle and rethrow
 989  0 templateParseProblem(e, startLine, cursorStart);
 990    }
 991   
 992    }
 993    else
 994    {
 995  386 if (!isRemoveId)
 996    {
 997  282 if (!_patternMatcher.matches(jwcId, _simpleIdPattern))
 998  0 templateParseProblem(
 999    ParseMessages.componentIdInvalid(tagName, startLine, jwcId),
 1000    startLocation,
 1001    startLine,
 1002    cursorStart);
 1003   
 1004  282 if (!_delegate.getKnownComponent(jwcId))
 1005  1 templateParseProblem(
 1006    ParseMessages.unknownComponentId(tagName, startLine, jwcId),
 1007    startLocation,
 1008    startLine,
 1009    cursorStart);
 1010   
 1011  281 try
 1012    {
 1013  281 allowBody = _delegate.getAllowBody(jwcId, startLocation);
 1014    }
 1015    catch (ApplicationRuntimeException e)
 1016    {
 1017    // give subclasses a chance to handle and rethrow
 1018  0 templateParseProblem(e, startLine, cursorStart);
 1019    }
 1020    }
 1021    }
 1022   
 1023    // Ignore the body if we're removing the entire tag,
 1024    // of if the corresponding component doesn't allow
 1025    // a body.
 1026   
 1027  894 boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
 1028   
 1029  894 if (_ignoring && ignoreBody)
 1030  1 templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
 1031    _resourceLocation, startLine), startLine, cursorStart);
 1032   
 1033  893 if (!emptyTag)
 1034  640 pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
 1035   
 1036    // End any open block.
 1037   
 1038  893 addTextToken(cursorStart - 1);
 1039   
 1040  893 if (!isRemoveId)
 1041    {
 1042  790 addOpenToken(tagName, jwcId, type, startLocation);
 1043   
 1044  790 if (emptyTag)
 1045  253 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
 1046    }
 1047   
 1048  893 advance();
 1049    }
 1050   
 1051  640 private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
 1052    {
 1053  640 Tag tag = new Tag(tagName, startLine);
 1054   
 1055  640 tag._component = !isRemoveId;
 1056  640 tag._removeTag = isRemoveId;
 1057   
 1058  640 tag._ignoringBody = ignoreBody;
 1059   
 1060  640 _ignoring = tag._ignoringBody;
 1061   
 1062  640 tag._mustBalance = true;
 1063   
 1064  640 _stack.add(tag);
 1065    }
 1066   
 1067  78 private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
 1068    throws TemplateParseException
 1069    {
 1070  78 if (_ignoring)
 1071  1 templateParseProblem(
 1072    ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
 1073    new LocationImpl(_resourceLocation, startLine),
 1074    startLine,
 1075    cursorStart);
 1076   
 1077  77 if (emptyTag)
 1078  0 templateParseProblem(
 1079    ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
 1080    new LocationImpl(_resourceLocation, startLine),
 1081    startLine,
 1082    cursorStart);
 1083   
 1084  77 _tokens.clear();
 1085  77 _blockStart = -1;
 1086   
 1087  77 Tag tag = new Tag(tagName, startLine);
 1088   
 1089  77 tag._mustBalance = true;
 1090  77 tag._content = true;
 1091   
 1092  77 _stack.clear();
 1093  77 _stack.add(tag);
 1094   
 1095  77 advance();
 1096    }
 1097   
 1098  790 private void addOpenToken(String tagName, String jwcId, String type, Location location)
 1099    {
 1100  790 OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
 1101  790 _tokens.add(token);
 1102   
 1103  790 if (_attributes.isEmpty())
 1104  0 return;
 1105   
 1106  790 Iterator i = _attributes.entrySet().iterator();
 1107  790 while (i.hasNext())
 1108    {
 1109  1413 Map.Entry entry = (Map.Entry) i.next();
 1110   
 1111  1413 String key = (String) entry.getKey();
 1112   
 1113  1413 if (key.equalsIgnoreCase(_componentAttributeName))
 1114  790 continue;
 1115   
 1116  623 String value = (String) entry.getValue();
 1117   
 1118  623 addAttributeToToken(token, key, value);
 1119    }
 1120    }
 1121   
 1122    /**
 1123    * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
 1124    *
 1125    * @since 3.0
 1126    */
 1127   
 1128  623 private void addAttributeToToken(OpenToken token, String name, String attributeValue)
 1129    {
 1130  623 token.addAttribute(name, convertEntitiesToPlain(attributeValue));
 1131    }
 1132   
 1133    /**
 1134    * Invoked to handle a closing tag, i.e., &lt;/foo&gt;. When a tag closes, it will match against
 1135    * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
 1136    * balanced), but this is HTML, not XML, so many tags won't balance.
 1137    * <p>
 1138    * Once the matching tag is located, the question is ... is the tag dynamic or static? If
 1139    * static, then the current text block is extended to include this close tag. If dynamic, then
 1140    * the current text block is ended (before the '&lt;' that starts the tag) and a close token is
 1141    * added.
 1142    * <p>
 1143    * In either case, the matching static element and anything above it is removed, and the cursor
 1144    * is left on the character following the '&gt;'.
 1145    */
 1146   
 1147  1238 private void closeTag() throws TemplateParseException
 1148    {
 1149  1238 int cursorStart = _cursor;
 1150  1238 int length = _templateData.length;
 1151  1238 int startLine = _line;
 1152   
 1153  1238 Location startLocation = getCurrentLocation();
 1154   
 1155  1238 _cursor += CLOSE_TAG.length;
 1156   
 1157  1238 int tagStart = _cursor;
 1158   
 1159  1238 while (true)
 1160    {
 1161  4997 if (_cursor >= length)
 1162  1 templateParseProblem(
 1163    ParseMessages.incompleteCloseTag(startLine),
 1164    startLocation,
 1165    startLine,
 1166    cursorStart);
 1167   
 1168  4996 char ch = _templateData[_cursor];
 1169   
 1170  4996 if (ch == '>')
 1171  1237 break;
 1172   
 1173  3759 advance();
 1174    }
 1175   
 1176  1237 String tagName = new String(_templateData, tagStart, _cursor - tagStart);
 1177   
 1178  1237 int stackPos = _stack.size() - 1;
 1179  1237 Tag tag = null;
 1180   
 1181  1237 while (stackPos >= 0)
 1182    {
 1183  1309 tag = (Tag) _stack.get(stackPos);
 1184   
 1185  1309 if (tag.match(tagName))
 1186  1235 break;
 1187   
 1188  74 if (tag._mustBalance)
 1189  1 templateParseProblem(ParseMessages.improperlyNestedCloseTag(
 1190    tagName,
 1191    startLine,
 1192    tag._tagName,
 1193    tag._line), startLocation, startLine, cursorStart);
 1194   
 1195  73 stackPos--;
 1196    }
 1197   
 1198  1236 if (stackPos < 0)
 1199  1 templateParseProblem(
 1200    ParseMessages.unmatchedCloseTag(tagName, startLine),
 1201    startLocation,
 1202    startLine,
 1203    cursorStart);
 1204   
 1205    // Special case for the content tag
 1206   
 1207  1235 if (tag._content)
 1208    {
 1209  76 addTextToken(cursorStart - 1);
 1210   
 1211    // Advance the cursor right to the end.
 1212   
 1213  76 _cursor = length;
 1214  76 _stack.clear();
 1215  76 return;
 1216    }
 1217   
 1218    // When a component closes, add a CLOSE tag.
 1219  1159 if (tag._component)
 1220    {
 1221  533 addTextToken(cursorStart - 1);
 1222   
 1223  533 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
 1224    }
 1225    else
 1226    {
 1227    // The close of a static tag. Unless removing the tag
 1228    // entirely, make sure the block tag is part of a text block.
 1229   
 1230  626 if (_blockStart < 0 && !tag._removeTag && !_ignoring)
 1231  104 _blockStart = cursorStart;
 1232    }
 1233   
 1234    // Remove all elements at stackPos or above.
 1235   
 1236  1159 for (int i = _stack.size() - 1; i >= stackPos; i--)
 1237  1215 _stack.remove(i);
 1238   
 1239    // Advance cursor past '>'
 1240   
 1241  1159 advance();
 1242   
 1243    // If editting out the tag (i.e., $remove$) then kill any whitespace.
 1244    // For components that simply don't contain a body, removeTag will
 1245    // be false.
 1246   
 1247  1159 if (tag._removeTag)
 1248  101 advanceOverWhitespace();
 1249   
 1250    // If we were ignoring the body of the tag, then clear the ignoring
 1251    // flag, since we're out of the body.
 1252   
 1253  1159 if (tag._ignoringBody)
 1254  222 _ignoring = false;
 1255    }
 1256   
 1257    /**
 1258    * Advances the cursor to the next character. If the end-of-line is reached, then increments the
 1259    * line counter.
 1260    */
 1261   
 1262  86229 private void advance()
 1263    {
 1264  86229 int length = _templateData.length;
 1265   
 1266  86229 if (_cursor >= length)
 1267  0 return;
 1268   
 1269  86229 char ch = _templateData[_cursor];
 1270   
 1271  86229 _cursor++;
 1272   
 1273  86229 if (ch == '\n')
 1274    {
 1275  269 _line++;
 1276  269 _currentLocation = null;
 1277  269 return;
 1278    }
 1279   
 1280    // A \r, or a \r\n also counts as a new line.
 1281   
 1282  85960 if (ch == '\r')
 1283    {
 1284  2991 _line++;
 1285  2991 _currentLocation = null;
 1286   
 1287  2991 if (_cursor < length && _templateData[_cursor] == '\n')
 1288  2990 _cursor++;
 1289   
 1290  2991 return;
 1291    }
 1292   
 1293    // Not an end-of-line character.
 1294   
 1295    }
 1296   
 1297  188 private void advanceOverWhitespace()
 1298    {
 1299  188 int length = _templateData.length;
 1300   
 1301  188 while (_cursor < length)
 1302    {
 1303  921 char ch = _templateData[_cursor];
 1304  921 if (!Character.isWhitespace(ch))
 1305  183 return;
 1306   
 1307  738 advance();
 1308    }
 1309    }
 1310   
 1311    /**
 1312    * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
 1313    * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
 1314    * from the output map. May return null (rather than return an empty Map).
 1315    */
 1316   
 1317  15 private Map filter(Map input, String[] removeKeys)
 1318    {
 1319  15 if (input == null || input.isEmpty())
 1320  0 return null;
 1321   
 1322  15 Map result = null;
 1323   
 1324  15 Iterator i = input.entrySet().iterator();
 1325   
 1326  15 nextkey: while (i.hasNext())
 1327    {
 1328  20 Map.Entry entry = (Map.Entry) i.next();
 1329   
 1330  20 String key = (String) entry.getKey();
 1331   
 1332  20 for (int j = 0; j < removeKeys.length; j++)
 1333    {
 1334  25 if (key.equalsIgnoreCase(removeKeys[j]))
 1335  17 continue nextkey;
 1336    }
 1337   
 1338  3 if (result == null)
 1339  2 result = new HashMap(input.size());
 1340   
 1341  3 result.put(key, entry.getValue());
 1342    }
 1343   
 1344  15 return result;
 1345    }
 1346   
 1347    /**
 1348    * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
 1349    * and values. Returns the value for the first key found that matches (caselessly) the input
 1350    * key. Returns null if no value found.
 1351    */
 1352   
 1353  3483 protected String findValueCaselessly(String key, Map map)
 1354    {
 1355  3483 String result = (String) map.get(key);
 1356   
 1357  3483 if (result != null)
 1358  990 return result;
 1359   
 1360  2493 Iterator i = map.entrySet().iterator();
 1361  2493 while (i.hasNext())
 1362    {
 1363  2249 Map.Entry entry = (Map.Entry) i.next();
 1364   
 1365  2249 String entryKey = (String) entry.getKey();
 1366   
 1367  2249 if (entryKey.equalsIgnoreCase(key))
 1368  3 return (String) entry.getValue();
 1369    }
 1370   
 1371  2490 return null;
 1372    }
 1373   
 1374    /**
 1375    * Conversions needed by {@link #convertEntitiesToPlain(String)}
 1376    */
 1377   
 1378    private static final String[] CONVERSIONS =
 1379    { "&lt;", "<", "&gt;", ">", "&quot;", "\"", "&amp;", "&" };
 1380   
 1381    /**
 1382    * Provided a raw input string that has been recognized to be an expression, this removes excess
 1383    * white space and converts &amp;amp;;, &amp;quot;; &amp;lt;; and &amp;gt;; to their normal
 1384    * character values (otherwise its impossible to specify those values in expressions in the
 1385    * template).
 1386    */
 1387   
 1388  623 private String convertEntitiesToPlain(String input)
 1389    {
 1390  623 int inputLength = input.length();
 1391   
 1392  623 StringBuffer buffer = new StringBuffer(inputLength);
 1393   
 1394  623 int cursor = 0;
 1395   
 1396  623 outer: while (cursor < inputLength)
 1397    {
 1398  9130 for (int i = 0; i < CONVERSIONS.length; i += 2)
 1399    {
 1400  36503 String entity = CONVERSIONS[i];
 1401  36503 int entityLength = entity.length();
 1402  36503 String value = CONVERSIONS[i + 1];
 1403   
 1404  36503 if (cursor + entityLength > inputLength)
 1405  8799 continue;
 1406   
 1407  27704 if (input.substring(cursor, cursor + entityLength).equals(entity))
 1408    {
 1409  15 buffer.append(value);
 1410  15 cursor += entityLength;
 1411  15 continue outer;
 1412    }
 1413    }
 1414   
 1415  9115 buffer.append(input.charAt(cursor));
 1416  9115 cursor++;
 1417    }
 1418   
 1419  623 return buffer.toString().trim();
 1420    }
 1421   
 1422    /**
 1423    * Returns true if the map contains the given key (caseless search) and the value is "true"
 1424    * (caseless comparison).
 1425    */
 1426   
 1427  15 private boolean checkBoolean(String key, Map map)
 1428    {
 1429  15 String value = findValueCaselessly(key, map);
 1430   
 1431  15 if (value == null)
 1432  13 return false;
 1433   
 1434  2 return value.equalsIgnoreCase("true");
 1435    }
 1436   
 1437    /**
 1438    * Gets the current location within the file. This allows the location to be created only as
 1439    * needed, and multiple objects on the same line can share the same Location instance.
 1440    *
 1441    * @since 3.0
 1442    */
 1443   
 1444  2027 protected Location getCurrentLocation()
 1445    {
 1446  2027 if (_currentLocation == null)
 1447  1339 _currentLocation = new LocationImpl(_resourceLocation, _line);
 1448   
 1449  2027 return _currentLocation;
 1450    }
 1451   
 1452  127 public void setFactory(TemplateTokenFactory factory)
 1453    {
 1454  127 _factory = factory;
 1455    }
 1456   
 1457    }