001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.regex.Matcher; 020import java.util.regex.Pattern; 021 022import org.apache.commons.lang3.ArrayUtils; 023import org.apache.commons.lang3.StringUtils; 024import org.apache.commons.lang3.Validate; 025 026/** 027 * <p>Operations on Strings that contain words.</p> 028 * 029 * <p>This class tries to handle <code>null</code> input gracefully. 030 * An exception will not be thrown for a <code>null</code> input. 031 * Each method documents its behaviour in more detail.</p> 032 * 033 * @since 1.1 034 */ 035public class WordUtils { 036 037 /** 038 * <p><code>WordUtils</code> instances should NOT be constructed in 039 * standard programming. Instead, the class should be used as 040 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 041 * 042 * <p>This constructor is public to permit tools that require a JavaBean 043 * instance to operate.</p> 044 */ 045 public WordUtils() { 046 super(); 047 } 048 049 // Wrapping 050 //-------------------------------------------------------------------------- 051 /** 052 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 053 * 054 * <p>New lines will be separated by the system property line separator. 055 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 056 * 057 * <p>Leading spaces on a new line are stripped. 058 * Trailing spaces are not stripped.</p> 059 * 060 * <table border="1" summary="Wrap Results"> 061 * <tr> 062 * <th>input</th> 063 * <th>wrapLength</th> 064 * <th>result</th> 065 * </tr> 066 * <tr> 067 * <td>null</td> 068 * <td>*</td> 069 * <td>null</td> 070 * </tr> 071 * <tr> 072 * <td>""</td> 073 * <td>*</td> 074 * <td>""</td> 075 * </tr> 076 * <tr> 077 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 078 * <td>20</td> 079 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 080 * </tr> 081 * <tr> 082 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 083 * <td>20</td> 084 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 085 * </tr> 086 * <tr> 087 * <td>"Click here, http://commons.apache.org, to jump to the commons website"</td> 088 * <td>20</td> 089 * <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td> 090 * </tr> 091 * </table> 092 * 093 * (assuming that '\n' is the systems line separator) 094 * 095 * @param str the String to be word wrapped, may be null 096 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 097 * @return a line with newlines inserted, <code>null</code> if null input 098 */ 099 public static String wrap(final String str, final int wrapLength) { 100 return wrap(str, wrapLength, null, false); 101 } 102 103 /** 104 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 105 * 106 * <p>Leading spaces on a new line are stripped. 107 * Trailing spaces are not stripped.</p> 108 * 109 * <table border="1" summary="Wrap Results"> 110 * <tr> 111 * <th>input</th> 112 * <th>wrapLength</th> 113 * <th>newLineString</th> 114 * <th>wrapLongWords</th> 115 * <th>result</th> 116 * </tr> 117 * <tr> 118 * <td>null</td> 119 * <td>*</td> 120 * <td>*</td> 121 * <td>true/false</td> 122 * <td>null</td> 123 * </tr> 124 * <tr> 125 * <td>""</td> 126 * <td>*</td> 127 * <td>*</td> 128 * <td>true/false</td> 129 * <td>""</td> 130 * </tr> 131 * <tr> 132 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 133 * <td>20</td> 134 * <td>"\n"</td> 135 * <td>true/false</td> 136 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 137 * </tr> 138 * <tr> 139 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 140 * <td>20</td> 141 * <td>"<br />"</td> 142 * <td>true/false</td> 143 * <td>"Here is one line of<br />text that is going< 144 * br />to be wrapped after<br />20 columns."</td> 145 * </tr> 146 * <tr> 147 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 148 * <td>20</td> 149 * <td>null</td> 150 * <td>true/false</td> 151 * <td>"Here is one line of" + systemNewLine + "text that is going" 152 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 153 * </tr> 154 * <tr> 155 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 156 * <td>20</td> 157 * <td>"\n"</td> 158 * <td>false</td> 159 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 160 * </tr> 161 * <tr> 162 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 163 * <td>20</td> 164 * <td>"\n"</td> 165 * <td>true</td> 166 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 167 * </tr> 168 * </table> 169 * 170 * @param str the String to be word wrapped, may be null 171 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 172 * @param newLineStr the string to insert for a new line, 173 * <code>null</code> uses the system property line separator 174 * @param wrapLongWords true if long words (such as URLs) should be wrapped 175 * @return a line with newlines inserted, <code>null</code> if null input 176 */ 177 public static String wrap(final String str, 178 final int wrapLength, 179 final String newLineStr, 180 final boolean wrapLongWords) { 181 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 182 } 183 184 /** 185 * <p>Wraps a single line of text, identifying words by <code>wrapOn</code>.</p> 186 * 187 * <p>Leading spaces on a new line are stripped. 188 * Trailing spaces are not stripped.</p> 189 * 190 * <table border="1" summary="Wrap Results"> 191 * <tr> 192 * <th>input</th> 193 * <th>wrapLength</th> 194 * <th>newLineString</th> 195 * <th>wrapLongWords</th> 196 * <th>wrapOn</th> 197 * <th>result</th> 198 * </tr> 199 * <tr> 200 * <td>null</td> 201 * <td>*</td> 202 * <td>*</td> 203 * <td>true/false</td> 204 * <td>*</td> 205 * <td>null</td> 206 * </tr> 207 * <tr> 208 * <td>""</td> 209 * <td>*</td> 210 * <td>*</td> 211 * <td>true/false</td> 212 * <td>*</td> 213 * <td>""</td> 214 * </tr> 215 * <tr> 216 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 217 * <td>20</td> 218 * <td>"\n"</td> 219 * <td>true/false</td> 220 * <td>" "</td> 221 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 222 * </tr> 223 * <tr> 224 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 225 * <td>20</td> 226 * <td>"<br />"</td> 227 * <td>true/false</td> 228 * <td>" "</td> 229 * <td>"Here is one line of<br />text that is going<br /> 230 * to be wrapped after<br />20 columns."</td> 231 * </tr> 232 * <tr> 233 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 234 * <td>20</td> 235 * <td>null</td> 236 * <td>true/false</td> 237 * <td>" "</td> 238 * <td>"Here is one line of" + systemNewLine + "text that is going" 239 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 240 * </tr> 241 * <tr> 242 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 243 * <td>20</td> 244 * <td>"\n"</td> 245 * <td>false</td> 246 * <td>" "</td> 247 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 248 * </tr> 249 * <tr> 250 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 251 * <td>20</td> 252 * <td>"\n"</td> 253 * <td>true</td> 254 * <td>" "</td> 255 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 256 * </tr> 257 * <tr> 258 * <td>"flammable/inflammable"</td> 259 * <td>20</td> 260 * <td>"\n"</td> 261 * <td>true</td> 262 * <td>"/"</td> 263 * <td>"flammable\ninflammable"</td> 264 * </tr> 265 * </table> 266 * @param str the String to be word wrapped, may be null 267 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 268 * @param newLineStr the string to insert for a new line, 269 * <code>null</code> uses the system property line separator 270 * @param wrapLongWords true if long words (such as URLs) should be wrapped 271 * @param wrapOn regex expression to be used as a breakable characters, 272 * if blank string is provided a space character will be used 273 * @return a line with newlines inserted, <code>null</code> if null input 274 */ 275 public static String wrap(final String str, 276 int wrapLength, 277 String newLineStr, 278 final boolean wrapLongWords, 279 String wrapOn) { 280 if (str == null) { 281 return null; 282 } 283 if (newLineStr == null) { 284 newLineStr = System.lineSeparator(); 285 } 286 if (wrapLength < 1) { 287 wrapLength = 1; 288 } 289 if (StringUtils.isBlank(wrapOn)) { 290 wrapOn = " "; 291 } 292 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 293 final int inputLineLength = str.length(); 294 int offset = 0; 295 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 296 297 while (offset < inputLineLength) { 298 int spaceToWrapAt = -1; 299 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, Math 300 .min(offset + wrapLength + 1, inputLineLength))); 301 if (matcher.find()) { 302 if (matcher.start() == 0) { 303 offset += matcher.end(); 304 continue; 305 } else { 306 spaceToWrapAt = matcher.start() + offset; 307 } 308 } 309 310 // only last line without leading spaces is left 311 if (inputLineLength - offset <= wrapLength) { 312 break; 313 } 314 315 while (matcher.find()) { 316 spaceToWrapAt = matcher.start() + offset; 317 } 318 319 if (spaceToWrapAt >= offset) { 320 // normal case 321 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 322 wrappedLine.append(newLineStr); 323 offset = spaceToWrapAt + 1; 324 325 } else { 326 // really long word or URL 327 if (wrapLongWords) { 328 // wrap really long word one line at a time 329 wrappedLine.append(str.substring(offset, wrapLength + offset)); 330 wrappedLine.append(newLineStr); 331 offset += wrapLength; 332 } else { 333 // do not wrap really long word, just extend beyond limit 334 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 335 if (matcher.find()) { 336 spaceToWrapAt = matcher.start() + offset + wrapLength; 337 } 338 339 if (spaceToWrapAt >= 0) { 340 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 341 wrappedLine.append(newLineStr); 342 offset = spaceToWrapAt + 1; 343 } else { 344 wrappedLine.append(str.substring(offset)); 345 offset = inputLineLength; 346 } 347 } 348 } 349 } 350 351 // Whatever is left in line is short enough to just pass through 352 wrappedLine.append(str.substring(offset)); 353 354 return wrappedLine.toString(); 355 } 356 357 // Capitalizing 358 //----------------------------------------------------------------------- 359 /** 360 * <p>Capitalizes all the whitespace separated words in a String. 361 * Only the first character of each word is changed. To convert the 362 * rest of each word to lowercase at the same time, 363 * use {@link #capitalizeFully(String)}.</p> 364 * 365 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 366 * A <code>null</code> input String returns <code>null</code>. 367 * Capitalization uses the Unicode title case, normally equivalent to 368 * upper case.</p> 369 * 370 * <pre> 371 * WordUtils.capitalize(null) = null 372 * WordUtils.capitalize("") = "" 373 * WordUtils.capitalize("i am FINE") = "I Am FINE" 374 * </pre> 375 * 376 * @param str the String to capitalize, may be null 377 * @return capitalized String, <code>null</code> if null String input 378 * @see #uncapitalize(String) 379 * @see #capitalizeFully(String) 380 */ 381 public static String capitalize(final String str) { 382 return capitalize(str, null); 383 } 384 385 /** 386 * <p>Capitalizes all the delimiter separated words in a String. 387 * Only the first character of each word is changed. To convert the 388 * rest of each word to lowercase at the same time, 389 * use {@link #capitalizeFully(String, char[])}.</p> 390 * 391 * <p>The delimiters represent a set of characters understood to separate words. 392 * The first string character and the first non-delimiter character after a 393 * delimiter will be capitalized. </p> 394 * 395 * <p>A <code>null</code> input String returns <code>null</code>. 396 * Capitalization uses the Unicode title case, normally equivalent to 397 * upper case.</p> 398 * 399 * <pre> 400 * WordUtils.capitalize(null, *) = null 401 * WordUtils.capitalize("", *) = "" 402 * WordUtils.capitalize(*, new char[0]) = * 403 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 404 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 405 * </pre> 406 * 407 * @param str the String to capitalize, may be null 408 * @param delimiters set of characters to determine capitalization, null means whitespace 409 * @return capitalized String, <code>null</code> if null String input 410 * @see #uncapitalize(String) 411 * @see #capitalizeFully(String) 412 */ 413 public static String capitalize(final String str, final char... delimiters) { 414 final int delimLen = delimiters == null ? -1 : delimiters.length; 415 if (StringUtils.isEmpty(str) || delimLen == 0) { 416 return str; 417 } 418 int strLen = str.length(); 419 int [] newCodePoints = new int[strLen]; 420 int outOffset = 0; 421 422 boolean capitalizeNext = true; 423 for (int index = 0; index < strLen;) { 424 final int codePoint = str.codePointAt(index); 425 426 if (isDelimiter(codePoint, delimiters)) { 427 capitalizeNext = true; 428 newCodePoints[outOffset++] = codePoint; 429 index += Character.charCount(codePoint); 430 } else if (capitalizeNext) { 431 int titleCaseCodePoint = Character.toTitleCase(codePoint); 432 newCodePoints[outOffset++] = titleCaseCodePoint; 433 index += Character.charCount(titleCaseCodePoint); 434 capitalizeNext = false; 435 } else { 436 newCodePoints[outOffset++] = codePoint; 437 index += Character.charCount(codePoint); 438 } 439 } 440 return new String(newCodePoints, 0, outOffset); 441 } 442 443 //----------------------------------------------------------------------- 444 /** 445 * <p>Converts all the whitespace separated words in a String into capitalized words, 446 * that is each word is made up of a titlecase character and then a series of 447 * lowercase characters. </p> 448 * 449 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 450 * A <code>null</code> input String returns <code>null</code>. 451 * Capitalization uses the Unicode title case, normally equivalent to 452 * upper case.</p> 453 * 454 * <pre> 455 * WordUtils.capitalizeFully(null) = null 456 * WordUtils.capitalizeFully("") = "" 457 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 458 * </pre> 459 * 460 * @param str the String to capitalize, may be null 461 * @return capitalized String, <code>null</code> if null String input 462 */ 463 public static String capitalizeFully(final String str) { 464 return capitalizeFully(str, null); 465 } 466 467 /** 468 * <p>Converts all the delimiter separated words in a String into capitalized words, 469 * that is each word is made up of a titlecase character and then a series of 470 * lowercase characters. </p> 471 * 472 * <p>The delimiters represent a set of characters understood to separate words. 473 * The first string character and the first non-delimiter character after a 474 * delimiter will be capitalized. </p> 475 * 476 * <p>A <code>null</code> input String returns <code>null</code>. 477 * Capitalization uses the Unicode title case, normally equivalent to 478 * upper case.</p> 479 * 480 * <pre> 481 * WordUtils.capitalizeFully(null, *) = null 482 * WordUtils.capitalizeFully("", *) = "" 483 * WordUtils.capitalizeFully(*, null) = * 484 * WordUtils.capitalizeFully(*, new char[0]) = * 485 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 486 * </pre> 487 * 488 * @param str the String to capitalize, may be null 489 * @param delimiters set of characters to determine capitalization, null means whitespace 490 * @return capitalized String, <code>null</code> if null String input 491 */ 492 public static String capitalizeFully(String str, final char... delimiters) { 493 final int delimLen = delimiters == null ? -1 : delimiters.length; 494 if (StringUtils.isEmpty(str) || delimLen == 0) { 495 return str; 496 } 497 str = str.toLowerCase(); 498 return capitalize(str, delimiters); 499 } 500 501 //----------------------------------------------------------------------- 502 /** 503 * <p>Uncapitalizes all the whitespace separated words in a String. 504 * Only the first character of each word is changed.</p> 505 * 506 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 507 * A <code>null</code> input String returns <code>null</code>.</p> 508 * 509 * <pre> 510 * WordUtils.uncapitalize(null) = null 511 * WordUtils.uncapitalize("") = "" 512 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 513 * </pre> 514 * 515 * @param str the String to uncapitalize, may be null 516 * @return uncapitalized String, <code>null</code> if null String input 517 * @see #capitalize(String) 518 */ 519 public static String uncapitalize(final String str) { 520 return uncapitalize(str, null); 521 } 522 523 /** 524 * <p>Uncapitalizes all the whitespace separated words in a String. 525 * Only the first character of each word is changed.</p> 526 * 527 * <p>The delimiters represent a set of characters understood to separate words. 528 * The first string character and the first non-delimiter character after a 529 * delimiter will be uncapitalized. </p> 530 * 531 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 532 * A <code>null</code> input String returns <code>null</code>.</p> 533 * 534 * <pre> 535 * WordUtils.uncapitalize(null, *) = null 536 * WordUtils.uncapitalize("", *) = "" 537 * WordUtils.uncapitalize(*, null) = * 538 * WordUtils.uncapitalize(*, new char[0]) = * 539 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 540 * </pre> 541 * 542 * @param str the String to uncapitalize, may be null 543 * @param delimiters set of characters to determine uncapitalization, null means whitespace 544 * @return uncapitalized String, <code>null</code> if null String input 545 * @see #capitalize(String) 546 */ 547 public static String uncapitalize(final String str, final char... delimiters) { 548 final int delimLen = delimiters == null ? -1 : delimiters.length; 549 if (StringUtils.isEmpty(str) || delimLen == 0) { 550 return str; 551 } 552 int strLen = str.length(); 553 int [] newCodePoints = new int[strLen]; 554 int outOffset = 0; 555 556 boolean uncapitalizeNext = true; 557 for (int index = 0; index < strLen;) { 558 final int codePoint = str.codePointAt(index); 559 560 if (isDelimiter(codePoint, delimiters)) { 561 uncapitalizeNext = true; 562 newCodePoints[outOffset++] = codePoint; 563 index += Character.charCount(codePoint); 564 } else if (uncapitalizeNext) { 565 int titleCaseCodePoint = Character.toLowerCase(codePoint); 566 newCodePoints[outOffset++] = titleCaseCodePoint; 567 index += Character.charCount(titleCaseCodePoint); 568 uncapitalizeNext = false; 569 } else { 570 newCodePoints[outOffset++] = codePoint; 571 index += Character.charCount(codePoint); 572 } 573 } 574 return new String(newCodePoints, 0, outOffset); 575 } 576 577 //----------------------------------------------------------------------- 578 /** 579 * <p>Swaps the case of a String using a word based algorithm.</p> 580 * 581 * <ul> 582 * <li>Upper case character converts to Lower case</li> 583 * <li>Title case character converts to Lower case</li> 584 * <li>Lower case character after Whitespace or at start converts to Title case</li> 585 * <li>Other Lower case character converts to Upper case</li> 586 * </ul> 587 * 588 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 589 * A <code>null</code> input String returns <code>null</code>.</p> 590 * 591 * <pre> 592 * StringUtils.swapCase(null) = null 593 * StringUtils.swapCase("") = "" 594 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 595 * </pre> 596 * 597 * @param str the String to swap case, may be null 598 * @return the changed String, <code>null</code> if null String input 599 */ 600 public static String swapCase(final String str) { 601 if (StringUtils.isEmpty(str)) { 602 return str; 603 } 604 final int strLen = str.length(); 605 int [] newCodePoints = new int[strLen]; 606 int outOffset = 0; 607 boolean whitespace = true; 608 for (int index = 0; index < strLen;) { 609 final int oldCodepoint = str.codePointAt(index); 610 final int newCodePoint; 611 if (Character.isUpperCase(oldCodepoint)) { 612 newCodePoint = Character.toLowerCase(oldCodepoint); 613 whitespace = false; 614 } else if (Character.isTitleCase(oldCodepoint)) { 615 newCodePoint = Character.toLowerCase(oldCodepoint); 616 whitespace = false; 617 } else if (Character.isLowerCase(oldCodepoint)) { 618 if (whitespace) { 619 newCodePoint = Character.toTitleCase(oldCodepoint); 620 whitespace = false; 621 } else { 622 newCodePoint = Character.toUpperCase(oldCodepoint); 623 } 624 } else { 625 whitespace = Character.isWhitespace(oldCodepoint); 626 newCodePoint = oldCodepoint; 627 } 628 newCodePoints[outOffset++] = newCodePoint; 629 index += Character.charCount(newCodePoint); 630 } 631 return new String(newCodePoints, 0, outOffset); 632 } 633 634 //----------------------------------------------------------------------- 635 /** 636 * <p>Extracts the initial characters from each word in the String.</p> 637 * 638 * <p>All first characters after whitespace are returned as a new string. 639 * Their case is not changed.</p> 640 * 641 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 642 * A <code>null</code> input String returns <code>null</code>.</p> 643 * 644 * <pre> 645 * WordUtils.initials(null) = null 646 * WordUtils.initials("") = "" 647 * WordUtils.initials("Ben John Lee") = "BJL" 648 * WordUtils.initials("Ben J.Lee") = "BJ" 649 * </pre> 650 * 651 * @param str the String to get initials from, may be null 652 * @return String of initial letters, <code>null</code> if null String input 653 * @see #initials(String,char[]) 654 */ 655 public static String initials(final String str) { 656 return initials(str, null); 657 } 658 659 /** 660 * <p>Extracts the initial characters from each word in the String.</p> 661 * 662 * <p>All first characters after the defined delimiters are returned as a new string. 663 * Their case is not changed.</p> 664 * 665 * <p>If the delimiters array is null, then Whitespace is used. 666 * Whitespace is defined by {@link Character#isWhitespace(char)}. 667 * A <code>null</code> input String returns <code>null</code>. 668 * An empty delimiter array returns an empty String.</p> 669 * 670 * <pre> 671 * WordUtils.initials(null, *) = null 672 * WordUtils.initials("", *) = "" 673 * WordUtils.initials("Ben John Lee", null) = "BJL" 674 * WordUtils.initials("Ben J.Lee", null) = "BJ" 675 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 676 * WordUtils.initials(*, new char[0]) = "" 677 * </pre> 678 * 679 * @param str the String to get initials from, may be null 680 * @param delimiters set of characters to determine words, null means whitespace 681 * @return String of initial characters, <code>null</code> if null String input 682 * @see #initials(String) 683 */ 684 public static String initials(final String str, final char... delimiters) { 685 if (StringUtils.isEmpty(str)) { 686 return str; 687 } 688 if (delimiters != null && delimiters.length == 0) { 689 return ""; 690 } 691 final int strLen = str.length(); 692 final char[] buf = new char[strLen / 2 + 1]; 693 int count = 0; 694 boolean lastWasGap = true; 695 for (int i = 0; i < strLen; i++) { 696 final char ch = str.charAt(i); 697 698 if (isDelimiter(ch, delimiters)) { 699 lastWasGap = true; 700 } else if (lastWasGap) { 701 buf[count++] = ch; 702 lastWasGap = false; 703 } else { 704 continue; // ignore ch 705 } 706 } 707 return new String(buf, 0, count); 708 } 709 710 //----------------------------------------------------------------------- 711 /** 712 * <p>Checks if the String contains all words in the given array.</p> 713 * 714 * <p> 715 * A {@code null} String will return {@code false}. A {@code null}, zero 716 * length search array or if one element of array is null will return {@code false}. 717 * </p> 718 * 719 * <pre> 720 * WordUtils.containsAllWords(null, *) = false 721 * WordUtils.containsAllWords("", *) = false 722 * WordUtils.containsAllWords(*, null) = false 723 * WordUtils.containsAllWords(*, []) = false 724 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 725 * WordUtils.containsAllWords("abc def", "def", "abc") = true 726 * </pre> 727 * 728 * @param word The CharSequence to check, may be null 729 * @param words The array of String words to search for, may be null 730 * @return {@code true} if all search words are found, {@code false} otherwise 731 */ 732 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 733 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 734 return false; 735 } 736 for (final CharSequence w : words) { 737 if (StringUtils.isBlank(w)) { 738 return false; 739 } 740 final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); 741 if (!p.matcher(word).matches()) { 742 return false; 743 } 744 } 745 return true; 746 } 747 748 //----------------------------------------------------------------------- 749 /** 750 * Is the character a delimiter. 751 * 752 * @param ch the character to check 753 * @param delimiters the delimiters 754 * @return true if it is a delimiter 755 */ 756 public static boolean isDelimiter(final char ch, final char[] delimiters) { 757 if (delimiters == null) { 758 return Character.isWhitespace(ch); 759 } 760 for (final char delimiter : delimiters) { 761 if (ch == delimiter) { 762 return true; 763 } 764 } 765 return false; 766 } 767 768 //----------------------------------------------------------------------- 769 /** 770 * Is the codePoint a delimiter. 771 * 772 * @param codePoint the codePint to check 773 * @param delimiters the delimiters 774 * @return true if it is a delimiter 775 */ 776 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 777 if (delimiters == null) { 778 return Character.isWhitespace(codePoint); 779 } 780 for (int index = 0; index < delimiters.length; index++) { 781 int delimiterCodePoint = Character.codePointAt(delimiters, index); 782 if (delimiterCodePoint == codePoint) { 783 return true; 784 } 785 } 786 return false; 787 } 788 789 //----------------------------------------------------------------------- 790 /** 791 * Abbreviates the words nicely. 792 * 793 * This method searches for the first space after the lower limit and abbreviates 794 * the String there. It will also append any String passed as a parameter 795 * to the end of the String. The upper limit can be specified to forcibly 796 * abbreviate a String. 797 * 798 * @param str the string to be abbreviated. If null is passed, null is returned. 799 * If the empty String is passed, the empty string is returned. 800 * @param lower the lower limit. 801 * @param upper the upper limit; specify -1 if no limit is desired. 802 * If the upper limit is lower than the lower limit, it will be 803 * adjusted to be the same as the lower limit. 804 * @param appendToEnd String to be appended to the end of the abbreviated string. 805 * This is appended ONLY if the string was indeed abbreviated. 806 * The append does not count towards the lower or upper limits. 807 * @return the abbreviated String. 808 * 809 * <pre> 810 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 811 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 812 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 813 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 814 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 815 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 816 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 817 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 818 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 819 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 820 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 821 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 822 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 823 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 824 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 825 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 826 * </pre> 827 */ 828 public static String abbreviate(String str, int lower, int upper, String appendToEnd) { 829 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 830 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 831 832 if (StringUtils.isEmpty(str)) { 833 return str; 834 } 835 836 // if the lower value is greater than the length of the string, 837 // set to the length of the string 838 if (lower > str.length()) { 839 lower = str.length(); 840 } 841 842 // if the upper value is -1 (i.e. no limit) or is greater 843 // than the length of the string, set to the length of the string 844 if (upper == -1 || upper > str.length()) { 845 upper = str.length(); 846 } 847 848 final StringBuilder result = new StringBuilder(); 849 final int index = StringUtils.indexOf(str, " ", lower); 850 if (index == -1) { 851 result.append(str.substring(0, upper)); 852 // only if abbreviation has occured do we append the appendToEnd value 853 if (upper != str.length()) { 854 result.append(StringUtils.defaultString(appendToEnd)); 855 } 856 } else if (index > upper) { 857 result.append(str.substring(0, upper)); 858 result.append(StringUtils.defaultString(appendToEnd)); 859 } else { 860 result.append(str.substring(0, index)); 861 result.append(StringUtils.defaultString(appendToEnd)); 862 } 863 864 return result.toString(); 865 } 866 }