001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.concurrent.ThreadLocalRandom;
022
023import org.apache.commons.lang3.Validate;
024
025/**
026 * <p>
027 * Generates random Unicode strings containing the specified number of code points.
028 * Instances are created using a builder class, which allows the
029 * callers to define the properties of the generator. See the documentation for the
030 * {@link Builder} class to see available properties.
031 * </p>
032 * <pre>
033 * // Generates a 20 code point string, using only the letters a-z
034 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
035 *     .withinRange('a', 'z').build();
036 * String randomLetters = generator.generate(20);
037 * </pre>
038 * <pre>
039 * // Using Apache Commons RNG for randomness
040 * UniformRandomProvider rng = RandomSource.create(...);
041 * // Generates a 20 code point string, using only the letters a-z
042 * RandomStringGenerator generator = new RandomStringGenerator.Builder()
043 *     .withinRange('a', 'z')
044 *     .usingRandom(rng::nextInt) // uses Java 8 syntax
045 *     .build();
046 * String randomLetters = generator.generate(20);
047 * </pre>
048 * <p>
049 * {@code RandomStringBuilder} instances are immutable and thread-safe.
050 * </p>
051 * @since 1.1
052 */
053public final class RandomStringGenerator {
054
055    /**
056     * The smallest allowed code point (inclusive).
057     */
058    private final int minimumCodePoint;
059
060    /**
061     * The largest allowed code point (inclusive).
062     */
063    private final int maximumCodePoint;
064
065    /**
066     * Filters for code points.
067     */
068    private final Set<CharacterPredicate> inclusivePredicates;
069
070    /**
071     * The source of randomness for this generator.
072     */
073    private final TextRandomProvider random;
074
075    /**
076     * Constructs the generator.
077     *
078     * @param minimumCodePoint
079     *            smallest allowed code point (inclusive)
080     * @param maximumCodePoint
081     *            largest allowed code point (inclusive)
082     * @param inclusivePredicates
083     *            filters for code points
084     * @param random
085     *            source of randomness
086     */
087    private RandomStringGenerator(int minimumCodePoint, int maximumCodePoint,
088            Set<CharacterPredicate> inclusivePredicates, TextRandomProvider random) {
089        this.minimumCodePoint = minimumCodePoint;
090        this.maximumCodePoint = maximumCodePoint;
091        this.inclusivePredicates = inclusivePredicates;
092        this.random = random;
093    }
094
095    /**
096     * Generates a random number within a range, using a {@link ThreadLocalRandom} instance
097     * or the user-supplied source of randomness.
098     *
099     * @param minInclusive
100     *            the minimum value allowed
101     * @param maxInclusive
102     *            the maximum value allowed
103     * @return the random number.
104     */
105    private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
106        if (random != null) {
107            return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
108        }
109        return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
110    }
111
112    /**
113     * <p>
114     * Generates a random string, containing the specified number of code points.
115     * </p>
116     * <p>Code points are randomly selected between the minimum and maximum values defined
117     * in the generator.
118     * Surrogate and private use characters are not returned, although the
119     * resulting string may contain pairs of surrogates that together encode a
120     * supplementary character.
121     * </p>
122     * <p>
123     * Note: the number of {@code char} code units generated will exceed
124     * {@code length} if the string contains supplementary characters. See the
125     * {@link Character} documentation to understand how Java stores Unicode
126     * values.
127     * </p>
128     *
129     * @param length
130     *            the number of code points to generate
131     * @return the generated string
132     * @throws IllegalArgumentException
133     *             if {@code length < 0}
134     */
135    public String generate(final int length) {
136        if (length == 0) {
137            return "";
138        }
139        Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
140
141        final StringBuilder builder = new StringBuilder(length);
142        long remaining = length;
143
144        do {
145            int codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
146
147            switch (Character.getType(codePoint)) {
148            case Character.UNASSIGNED:
149            case Character.PRIVATE_USE:
150            case Character.SURROGATE:
151                continue;
152            default:
153            }
154
155            if (inclusivePredicates != null) {
156                boolean matchedFilter = false;
157                for (CharacterPredicate predicate : inclusivePredicates) {
158                    if (predicate.test(codePoint)) {
159                        matchedFilter = true;
160                        break;
161                    }
162                }
163                if (!matchedFilter) {
164                    continue;
165                }
166            }
167
168            builder.appendCodePoint(codePoint);
169            remaining--;
170
171        } while (remaining != 0);
172
173        return builder.toString();
174    }
175
176    /**
177     * <p>A builder for generating {@code RandomStringGenerator} instances.</p>
178     * <p>The behaviour of a generator is controlled by properties set by this
179     * builder. Each property has a default value, which can be overridden by
180     * calling the methods defined in this class, prior to calling {@link #build()}.</p>
181     *
182     * <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
183     *
184     * <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
185     * default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
186     *
187     * <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
188     * otherwise {@link ThreadLocalRandom} is used.</p>
189     *
190     * <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
191     * which defines a collection of tests that are applied to the randomly generated code points.
192     * The code points will only be included in the result if they pass at least one of the tests.
193     * Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
194     *
195     * <p>This class is not thread safe.</p>
196     * @since 1.1
197     */
198    public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
199
200        /**
201         * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
202         * ({@value}).
203         */
204        public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
205
206        /**
207         * The default string length produced by this builder: {@value}.
208         */
209        public static final int DEFAULT_LENGTH = 0;
210
211        /**
212         * The default minimum code point allowed: {@value}.
213         */
214        public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
215
216        /**
217         * The minimum code point allowed.
218         */
219        private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
220
221        /**
222         * The maximum code point allowed.
223         */
224        private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
225
226        /**
227         * Filters for code points.
228         */
229        private Set<CharacterPredicate> inclusivePredicates;
230
231        /**
232         * The source of randomness.
233         */
234        private TextRandomProvider random;
235
236        /**
237         * <p>
238         * Specifies the minimum and maximum code points allowed in the
239         * generated string.
240         * </p>
241         *
242         * @param minimumCodePoint
243         *            the smallest code point allowed (inclusive)
244         * @param maximumCodePoint
245         *            the largest code point allowed (inclusive)
246         * @return {@code this}, to allow method chaining
247         * @throws IllegalArgumentException
248         *             if {@code maximumCodePoint >}
249         *             {@link Character#MAX_CODE_POINT}
250         * @throws IllegalArgumentException
251         *             if {@code minimumCodePoint < 0}
252         * @throws IllegalArgumentException
253         *             if {@code minimumCodePoint > maximumCodePoint}
254         */
255        public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
256            Validate.isTrue(minimumCodePoint <= maximumCodePoint,
257                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
258            Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
259            Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
260                    "Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
261
262            this.minimumCodePoint = minimumCodePoint;
263            this.maximumCodePoint = maximumCodePoint;
264            return this;
265        }
266
267        /**
268         * <p>
269         * Limits the characters in the generated string to those that match at
270         * least one of the predicates supplied.
271         * </p>
272         *
273         * <p>
274         * Passing {@code null} or an empty array to this method will revert to the
275         * default behaviour of allowing any character. Multiple calls to this
276         * method will replace the previously stored predicates.
277         * </p>
278         *
279         * @param predicates
280         *            the predicates, may be {@code null} or empty
281         * @return {@code this}, to allow method chaining
282         */
283        public Builder filteredBy(final CharacterPredicate... predicates) {
284            if (predicates == null || predicates.length == 0) {
285                inclusivePredicates = null;
286                return this;
287            }
288
289            if (inclusivePredicates == null) {
290                inclusivePredicates = new HashSet<>();
291            } else {
292                inclusivePredicates.clear();
293            }
294
295            for (CharacterPredicate predicate : predicates) {
296                inclusivePredicates.add(predicate);
297            }
298
299            return this;
300        }
301
302        /**
303         * <p>
304         * Overrides the default source of randomness.  It is highly
305         * recommended that a random number generator library like
306         * <a href="http://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
307         * be used to provide the random number generation.
308         * </p>
309         *
310         * <p>
311         * When using Java 8 or later, {@link TextRandomProvider} is a
312         * functional interface and need not be explicitly implemented:
313         * </p>
314         * <pre>
315         * {@code
316         * UniformRandomProvider rng = RandomSource.create(...);
317         * RandomStringGenerator gen = new RandomStringGenerator.Builder()
318         *     .usingRandom(rng::nextInt)
319         *     // additional builder calls as needed
320         *     .build();
321         * }
322         * </pre>
323         *
324         * <p>
325         * Passing {@code null} to this method will revert to the default source of
326         * randomness.
327         * </p>
328         *
329         * @param random
330         *            the source of randomness, may be {@code null}
331         * @return {@code this}, to allow method chaining
332         */
333        public Builder usingRandom(final TextRandomProvider random) {
334            this.random = random;
335            return this;
336        }
337
338        /**
339         * <p>Builds the {@code RandomStringGenerator} using the properties specified.</p>
340         * @return the configured {@code RandomStringGenerator}
341         */
342        @Override
343        public RandomStringGenerator build() {
344            return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates, random);
345        }
346    }
347}