001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.concurrent.ThreadLocalRandom; 022 023import org.apache.commons.lang3.Validate; 024 025/** 026 * <p> 027 * Generates random Unicode strings containing the specified number of code points. 028 * Instances are created using a builder class, which allows the 029 * callers to define the properties of the generator. See the documentation for the 030 * {@link Builder} class to see available properties. 031 * </p> 032 * <pre> 033 * // Generates a 20 code point string, using only the letters a-z 034 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 035 * .withinRange('a', 'z').build(); 036 * String randomLetters = generator.generate(20); 037 * </pre> 038 * <pre> 039 * // Using Apache Commons RNG for randomness 040 * UniformRandomProvider rng = RandomSource.create(...); 041 * // Generates a 20 code point string, using only the letters a-z 042 * RandomStringGenerator generator = new RandomStringGenerator.Builder() 043 * .withinRange('a', 'z') 044 * .usingRandom(rng::nextInt) // uses Java 8 syntax 045 * .build(); 046 * String randomLetters = generator.generate(20); 047 * </pre> 048 * <p> 049 * {@code RandomStringBuilder} instances are immutable and thread-safe. 050 * </p> 051 * @since 1.1 052 */ 053public final class RandomStringGenerator { 054 055 /** 056 * The smallest allowed code point (inclusive). 057 */ 058 private final int minimumCodePoint; 059 060 /** 061 * The largest allowed code point (inclusive). 062 */ 063 private final int maximumCodePoint; 064 065 /** 066 * Filters for code points. 067 */ 068 private final Set<CharacterPredicate> inclusivePredicates; 069 070 /** 071 * The source of randomness for this generator. 072 */ 073 private final TextRandomProvider random; 074 075 /** 076 * Constructs the generator. 077 * 078 * @param minimumCodePoint 079 * smallest allowed code point (inclusive) 080 * @param maximumCodePoint 081 * largest allowed code point (inclusive) 082 * @param inclusivePredicates 083 * filters for code points 084 * @param random 085 * source of randomness 086 */ 087 private RandomStringGenerator(int minimumCodePoint, int maximumCodePoint, 088 Set<CharacterPredicate> inclusivePredicates, TextRandomProvider random) { 089 this.minimumCodePoint = minimumCodePoint; 090 this.maximumCodePoint = maximumCodePoint; 091 this.inclusivePredicates = inclusivePredicates; 092 this.random = random; 093 } 094 095 /** 096 * Generates a random number within a range, using a {@link ThreadLocalRandom} instance 097 * or the user-supplied source of randomness. 098 * 099 * @param minInclusive 100 * the minimum value allowed 101 * @param maxInclusive 102 * the maximum value allowed 103 * @return the random number. 104 */ 105 private int generateRandomNumber(final int minInclusive, final int maxInclusive) { 106 if (random != null) { 107 return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive; 108 } 109 return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1); 110 } 111 112 /** 113 * <p> 114 * Generates a random string, containing the specified number of code points. 115 * </p> 116 * <p>Code points are randomly selected between the minimum and maximum values defined 117 * in the generator. 118 * Surrogate and private use characters are not returned, although the 119 * resulting string may contain pairs of surrogates that together encode a 120 * supplementary character. 121 * </p> 122 * <p> 123 * Note: the number of {@code char} code units generated will exceed 124 * {@code length} if the string contains supplementary characters. See the 125 * {@link Character} documentation to understand how Java stores Unicode 126 * values. 127 * </p> 128 * 129 * @param length 130 * the number of code points to generate 131 * @return the generated string 132 * @throws IllegalArgumentException 133 * if {@code length < 0} 134 */ 135 public String generate(final int length) { 136 if (length == 0) { 137 return ""; 138 } 139 Validate.isTrue(length > 0, "Length %d is smaller than zero.", length); 140 141 final StringBuilder builder = new StringBuilder(length); 142 long remaining = length; 143 144 do { 145 int codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint); 146 147 switch (Character.getType(codePoint)) { 148 case Character.UNASSIGNED: 149 case Character.PRIVATE_USE: 150 case Character.SURROGATE: 151 continue; 152 default: 153 } 154 155 if (inclusivePredicates != null) { 156 boolean matchedFilter = false; 157 for (CharacterPredicate predicate : inclusivePredicates) { 158 if (predicate.test(codePoint)) { 159 matchedFilter = true; 160 break; 161 } 162 } 163 if (!matchedFilter) { 164 continue; 165 } 166 } 167 168 builder.appendCodePoint(codePoint); 169 remaining--; 170 171 } while (remaining != 0); 172 173 return builder.toString(); 174 } 175 176 /** 177 * <p>A builder for generating {@code RandomStringGenerator} instances.</p> 178 * <p>The behaviour of a generator is controlled by properties set by this 179 * builder. Each property has a default value, which can be overridden by 180 * calling the methods defined in this class, prior to calling {@link #build()}.</p> 181 * 182 * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p> 183 * 184 * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The 185 * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p> 186 * 187 * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)}, 188 * otherwise {@link ThreadLocalRandom} is used.</p> 189 * 190 * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)}, 191 * which defines a collection of tests that are applied to the randomly generated code points. 192 * The code points will only be included in the result if they pass at least one of the tests. 193 * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p> 194 * 195 * <p>This class is not thread safe.</p> 196 * @since 1.1 197 */ 198 public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> { 199 200 /** 201 * The default maximum code point allowed: {@link Character#MAX_CODE_POINT} 202 * ({@value}). 203 */ 204 public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT; 205 206 /** 207 * The default string length produced by this builder: {@value}. 208 */ 209 public static final int DEFAULT_LENGTH = 0; 210 211 /** 212 * The default minimum code point allowed: {@value}. 213 */ 214 public static final int DEFAULT_MINIMUM_CODE_POINT = 0; 215 216 /** 217 * The minimum code point allowed. 218 */ 219 private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT; 220 221 /** 222 * The maximum code point allowed. 223 */ 224 private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT; 225 226 /** 227 * Filters for code points. 228 */ 229 private Set<CharacterPredicate> inclusivePredicates; 230 231 /** 232 * The source of randomness. 233 */ 234 private TextRandomProvider random; 235 236 /** 237 * <p> 238 * Specifies the minimum and maximum code points allowed in the 239 * generated string. 240 * </p> 241 * 242 * @param minimumCodePoint 243 * the smallest code point allowed (inclusive) 244 * @param maximumCodePoint 245 * the largest code point allowed (inclusive) 246 * @return {@code this}, to allow method chaining 247 * @throws IllegalArgumentException 248 * if {@code maximumCodePoint >} 249 * {@link Character#MAX_CODE_POINT} 250 * @throws IllegalArgumentException 251 * if {@code minimumCodePoint < 0} 252 * @throws IllegalArgumentException 253 * if {@code minimumCodePoint > maximumCodePoint} 254 */ 255 public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) { 256 Validate.isTrue(minimumCodePoint <= maximumCodePoint, 257 "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint); 258 Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint); 259 Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT, 260 "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint); 261 262 this.minimumCodePoint = minimumCodePoint; 263 this.maximumCodePoint = maximumCodePoint; 264 return this; 265 } 266 267 /** 268 * <p> 269 * Limits the characters in the generated string to those that match at 270 * least one of the predicates supplied. 271 * </p> 272 * 273 * <p> 274 * Passing {@code null} or an empty array to this method will revert to the 275 * default behaviour of allowing any character. Multiple calls to this 276 * method will replace the previously stored predicates. 277 * </p> 278 * 279 * @param predicates 280 * the predicates, may be {@code null} or empty 281 * @return {@code this}, to allow method chaining 282 */ 283 public Builder filteredBy(final CharacterPredicate... predicates) { 284 if (predicates == null || predicates.length == 0) { 285 inclusivePredicates = null; 286 return this; 287 } 288 289 if (inclusivePredicates == null) { 290 inclusivePredicates = new HashSet<>(); 291 } else { 292 inclusivePredicates.clear(); 293 } 294 295 for (CharacterPredicate predicate : predicates) { 296 inclusivePredicates.add(predicate); 297 } 298 299 return this; 300 } 301 302 /** 303 * <p> 304 * Overrides the default source of randomness. It is highly 305 * recommended that a random number generator library like 306 * <a href="http://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a> 307 * be used to provide the random number generation. 308 * </p> 309 * 310 * <p> 311 * When using Java 8 or later, {@link TextRandomProvider} is a 312 * functional interface and need not be explicitly implemented: 313 * </p> 314 * <pre> 315 * {@code 316 * UniformRandomProvider rng = RandomSource.create(...); 317 * RandomStringGenerator gen = new RandomStringGenerator.Builder() 318 * .usingRandom(rng::nextInt) 319 * // additional builder calls as needed 320 * .build(); 321 * } 322 * </pre> 323 * 324 * <p> 325 * Passing {@code null} to this method will revert to the default source of 326 * randomness. 327 * </p> 328 * 329 * @param random 330 * the source of randomness, may be {@code null} 331 * @return {@code this}, to allow method chaining 332 */ 333 public Builder usingRandom(final TextRandomProvider random) { 334 this.random = random; 335 return this; 336 } 337 338 /** 339 * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p> 340 * @return the configured {@code RandomStringGenerator} 341 */ 342 @Override 343 public RandomStringGenerator build() { 344 return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates, random); 345 } 346 } 347}