001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.util.text; 016 017 /** 018 * An object that encodes a character according to rules of the HTML specification, 019 * so that it will be properly parsed by a browser irrespectively of the character 020 * encoding used in the HTML output. 021 * 022 * @author mb 023 * @since 4.0 024 */ 025 public class MarkupCharacterTranslator implements ICharacterTranslator 026 { 027 private static final String SAFE_CHARACTERS = 028 "01234567890" 029 + "abcdefghijklmnopqrstuvwxyz" 030 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 031 + "\t\n\r !#$%'()*+,-./:;=?@[\\]^_`{|}~"; 032 033 private static final String[][] ENTITIES = { 034 { "\"", """ }, 035 { "<", "<" }, 036 { ">", ">" }, 037 { "&", "&" } 038 }; 039 040 private static final ICharacterMatcher SAFE_MATCHER = new AsciiCharacterMatcher(SAFE_CHARACTERS); 041 private static final ICharacterTranslator ENTITY_TRANSLATOR = new AsciiCharacterTranslator(ENTITIES); 042 043 private boolean _encodeNonAscii; 044 private ICharacterMatcher _safeMatcher; 045 private ICharacterTranslator _entityTranslator; 046 047 public MarkupCharacterTranslator() 048 { 049 this(true); 050 } 051 052 public MarkupCharacterTranslator(boolean encodeNonAscii) 053 { 054 this(encodeNonAscii, SAFE_MATCHER, ENTITY_TRANSLATOR); 055 } 056 057 public MarkupCharacterTranslator(boolean encodeNonAscii, ICharacterMatcher safeMatcher, ICharacterTranslator entityTranslator) 058 { 059 _encodeNonAscii = encodeNonAscii; 060 _safeMatcher = safeMatcher; 061 _entityTranslator = entityTranslator; 062 } 063 064 public MarkupCharacterTranslator(boolean encodeNonAscii, String safeCharacters, String[][] entities) 065 { 066 _encodeNonAscii = encodeNonAscii; 067 _safeMatcher = new AsciiCharacterMatcher(safeCharacters); 068 _entityTranslator = new AsciiCharacterTranslator(entities); 069 } 070 071 /** 072 * @see org.apache.tapestry.util.text.IMarkupCharacterTranslator#translateAttribute(char) 073 */ 074 public String translate(char ch) { 075 if (ch >= 128 && !_encodeNonAscii) 076 return null; 077 078 if (_safeMatcher.matches(ch)) 079 return null; 080 081 String entity = _entityTranslator.translate(ch); 082 if (entity != null) 083 return entity; 084 085 // needs to use a NumberFormat here to be fully compliant, 086 // but this is accepted fine by the browsers 087 return "&#" + (int) ch + ";"; 088 } 089 }