|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectorg.hd.d.pg2k.svrCore.TextUtils
public final class TextUtils
Some simple common text utilities of scope throughout the application.
| Nested Class Summary | |
|---|---|
private static class |
TextUtils.Base64Cache
Private to encode8To6()/decode8To6(); automagically created on first access. |
static interface |
TextUtils.CharSequence7Bit
Extension (marker interface) for a CharSequence that holds only 7-bit character values. |
static interface |
TextUtils.CharSequence8Bit
Extension for a CharSequence that holds only 8-bit character values. |
| Field Summary | |
|---|---|
static java.util.Comparator<java.lang.CharSequence> |
CASE_INSENSITIVE_ORDER
Orders CharSequence objects as if by String.compareToIgnoreCase(); not null. |
static java.util.Comparator<java.lang.CharSequence> |
CASE_SENSITIVE_ORDER
Orders CharSequence objects as if by String.compareTo(); not null. |
private static int |
MIN_NAME_CHARS_FOR_EFFICIENT_TEXT_REPRESENTATION
Minimum-character entity to hold as Name with its high overheads; non-negative. |
private static java.lang.String[] |
sizeSuffixes
Suffixes used by sizeAsText. |
private static java.util.regex.Pattern |
wordBoundaryRegex
Compiled regex to match inter-word gaps in plain (mainly English) text; non-null. |
private static java.lang.String |
XML_LINE_TERM
String to write as line terminator when converting to XML. |
| Constructor Summary | |
|---|---|
private |
TextUtils()
Prevent construction of an instance. |
| Method Summary | ||
|---|---|---|
static int |
compare(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
Compares two (non-null) CharSequences for lexical order. |
|
static boolean |
contentEquals(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
Checks that two (non-null) CharSequences represent the same sequence of chars. |
|
static boolean |
contentEquals(TextUtils.CharSequence8Bit cs1,
TextUtils.CharSequence8Bit cs2)
Checks that two (non-null) 8-bit CharSequences represent the same sequence of chars/bytes. |
|
static boolean |
contentEqualsIgnoreCase(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
Checks that two (non-null) CharSequences represent the same sequence of chars, ignoring case. |
|
static boolean |
contentEqualsOrBothNull(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
Checks that two CharSequences are both null or represent the same (non-null) sequence of chars. |
|
static
|
createCharSequenceSortedSet(java.util.Collection<T> csc)
Create and populate a SortedSet of CharSequence in natural/total case-sensitive order that will work with any mix of immutable CharSequence keys; never null. |
|
static
|
createCharSequenceSortedSet(java.util.Collection<T> csc,
java.util.Comparator<java.lang.CharSequence> comparator)
Create and populate a SortedSet of CharSequence in specified order that will work with any mix of immutable CharSequence keys; never null. |
|
static byte[] |
decode8To6(java.lang.String base64Text)
Decode to a byte array (8 bit) from ASCII Base-64 (6 bit); never null. |
|
static java.lang.String |
encode8To6(byte[] data8Bit)
Encode a byte array (8 bit) in ASCII Base-64 (6 bit); never null. |
|
static boolean |
endsWith(java.lang.CharSequence mainText,
java.lang.CharSequence putativeSuffix)
Returns true if the first sequence ends with the second (neither null), else false. |
|
static java.lang.String |
escapeHTMLMetaChars(java.lang.String in)
Rewrite HTML so that it displays as "raw" text and is safe to use in attribute values. |
|
static int |
hashCode(java.lang.CharSequence cs)
Return a hash code the same as or similar to that of a String containing the same characters. |
|
static java.lang.String |
hashCodeHexString(java.lang.CharSequence cs)
Return an ASCII printable hex hash code; never null nor empty. |
|
static void |
importCopy(org.w3c.dom.Node dest,
org.w3c.dom.Node src)
Recursively copy second Node and contents into first node. |
|
static int |
indexOf(java.lang.CharSequence cs,
char c)
First index of specified character in given (non-null) CharSequence as for String. |
|
static int |
indexOf(java.lang.CharSequence cs,
char c,
int fromIndex)
First index of specified character in given (non-null) CharSequence from/after specified index as for String. |
|
static boolean |
isASCII7(java.lang.String s)
Returns true if given String is 7-bit clean, is is pure ASCII, or is null. |
|
static int |
lastIndexOf(java.lang.CharSequence cs,
char c)
Last index of specified character in given (non-null) CharSequence as for String. |
|
static int |
lastIndexOf(java.lang.CharSequence cs,
char c,
int fromIndex)
Last index of specified character in given (non-null) CharSequence from/before specified start index as for String. |
|
static java.lang.CharSequence |
makeEfficientTextRepresentation(java.lang.CharSequence value,
java.util.concurrent.atomic.AtomicReference<Name> prevRef)
Make an efficient representation for possibly non-unique text to be held in memory long-term. |
|
static int |
quickWordCount(java.lang.CharSequence text)
Quickly attempt to count the words in plain text; non-negative. |
|
static boolean |
regionMatches(java.lang.CharSequence cs1,
int start1,
java.lang.CharSequence cs2,
int start2,
int len)
Checks that the specified region of two (non-null) CharSequences matches exactly (as for String). |
|
static java.lang.String |
sanitiseForXML(java.lang.String input,
int maxLen,
boolean allowEntities)
Sanitise text for XHTML/HTML/WML/XML use. |
|
static java.lang.String |
sizeAsText(long size,
boolean abbrev)
Renders size in bytes as text, abbreviated if requested. |
|
static boolean |
startsWith(java.lang.CharSequence mainText,
java.lang.CharSequence putativePrefix)
Returns true if the first sequence starts with the second (neither null), else false. |
|
static java.lang.String |
toString(byte[] ascii)
Efficient conversion from 7-bit or 8-bit text (once char per byte) to String; never null. |
|
static java.lang.String |
toXML(org.w3c.dom.Node node,
boolean toXHTML,
boolean terse)
Write DOM tree as XML/XHTML String; never "" nor null. |
|
static void |
toXML(java.lang.StringBuilder result,
org.w3c.dom.Node node,
boolean toXHTML,
boolean terse)
Write DOM tree as XML/XHTML String; appends to supplied StringBuilder. |
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
private static final java.lang.String[] sizeSuffixes
private static final java.lang.String XML_LINE_TERM
public static final java.util.Comparator<java.lang.CharSequence> CASE_SENSITIVE_ORDER
public static final java.util.Comparator<java.lang.CharSequence> CASE_INSENSITIVE_ORDER
private static final int MIN_NAME_CHARS_FOR_EFFICIENT_TEXT_REPRESENTATION
Empirically determined by distribution of sizes of keys and values!
private static final java.util.regex.Pattern wordBoundaryRegex
| Constructor Detail |
|---|
private TextUtils()
| Method Detail |
|---|
public static final java.lang.String sizeAsText(long size,
boolean abbrev)
public static boolean isASCII7(java.lang.String s)
public static final void importCopy(org.w3c.dom.Node dest,
org.w3c.dom.Node src)
public static final java.lang.String toXML(org.w3c.dom.Node node,
boolean toXHTML,
boolean terse)
When writing XHTML we make text nodes XML/HTML-safe, though allow embedded entities.
node - DOM tree root; never nulltoXHTML - if true write with formatting suitable to include directly in HTML/XHTML for human consumption
(eg using dl/ul/ol nested lists to produce formatted text representing the structure)
rather than a pure XML representation of the Node tree itselfterse - if true write as compactly as possible, else make more human-readable if possible
public static final void toXML(java.lang.StringBuilder result,
org.w3c.dom.Node node,
boolean toXHTML,
boolean terse)
When writing XHTML we make text nodes XML/HTML-safe, though allow embedded entities.
node - DOM tree root; never nulltoXHTML - if true write with formatting suitable to include directly in HTML/XHTML for human consumption
(eg using dl/ul/ol nested lists to produce formatted text representing the structure)
rather than a pure XML representation of the Node tree itselfterse - if true write as compactly as possible, else make more human-readable if possiblepublic static java.lang.String escapeHTMLMetaChars(java.lang.String in)
If the input text does not contain these meta-characters then it is returned unchanged.
Additionally, all characters < 32 (ASCII control characters) are converted to spaces.
If null is passed in, then this returns null, to simplify its use in some cases where a null might be present.
in - String possibly containing HTML/XML meta-characterspublic static byte[] decode8To6(java.lang.String base64Text)
base64Text - data in base-46, eg as encoded by encode8To6(); never null
public static java.lang.String encode8To6(byte[] data8Bit)
data8Bit - binary input data; never null
public static java.lang.String sanitiseForXML(java.lang.String input,
int maxLen,
boolean allowEntities)
Our simple definition of a syntactically-correct entity is that it may contain any selection of ASCII digits and letters and optionally a leading hash (`#'), but certainly no whitespace. We may enforce a length limit.
If the input string is OK it is returned untouched.
TODO: Needs version with extra argument to allow entity sequences (of the form '&'xxx;) to be treated as single characters and to delete any '<'...'>' sequences, ie to do a more sophisticated job of sanitising XML/HTML that has some simple markup (primarily entity codes needed for i18n). TODO: jUnit tests
maxLen - if positive the output is limited to at most
this number of characters;
if >= 3 truncated text is marked with a trailing ``...''
in the last three positionsallowEntities - if true, treats HTML/XML entity sequences
as if single characters; the entities are vaguely tested for
correct syntax and may not be allowed if invalid
public static final int indexOf(java.lang.CharSequence cs,
char c)
public static final int indexOf(java.lang.CharSequence cs,
char c,
int fromIndex)
public static final int lastIndexOf(java.lang.CharSequence cs,
char c)
public static final int lastIndexOf(java.lang.CharSequence cs,
char c,
int fromIndex)
public static boolean regionMatches(java.lang.CharSequence cs1,
int start1,
java.lang.CharSequence cs2,
int start2,
int len)
public static boolean contentEqualsOrBothNull(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
public static boolean contentEquals(TextUtils.CharSequence8Bit cs1,
TextUtils.CharSequence8Bit cs2)
public static boolean contentEquals(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
public static boolean contentEqualsIgnoreCase(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
public static int compare(java.lang.CharSequence cs1,
java.lang.CharSequence cs2)
public static boolean startsWith(java.lang.CharSequence mainText,
java.lang.CharSequence putativePrefix)
public static boolean endsWith(java.lang.CharSequence mainText,
java.lang.CharSequence putativeSuffix)
public static <T extends java.lang.CharSequence> java.util.SortedSet<T> createCharSequenceSortedSet(java.util.Collection<T> csc)
The result is not thread-safe and is mutable.
csc - initial collection to populate from; can be null to avoid initial population
public static <T extends java.lang.CharSequence> java.util.SortedSet<T> createCharSequenceSortedSet(java.util.Collection<T> csc,
java.util.Comparator<java.lang.CharSequence> comparator)
The result is not thread-safe and is mutable.
csc - initial collection to populate from; can be null to avoid initial populationcomparator - used to order the items; never null
public static java.lang.CharSequence makeEfficientTextRepresentation(java.lang.CharSequence value,
java.util.concurrent.atomic.AtomicReference<Name> prevRef)
A null input results in a null.
A non-8-bit (or empty) text results in a String result.
A short (8-bit) text will result in an non-intern()ed CS8Bit result.
Else an (implicitly intern()ed) Name is returned.
This aims to avoid intern() overhead for small values.
value - the value to optimiseprevRef - if non-null, used to convey Name values from one call to the next
to help provide a more-compact representation by sharing prefixes and suffixespublic static java.lang.String toString(byte[] ascii)
ascii - text; never null but may be empty
public static int hashCode(java.lang.CharSequence cs)
public static java.lang.String hashCodeHexString(java.lang.CharSequence cs)
public static int quickWordCount(java.lang.CharSequence text)
|
DHD Multimedia Gallery V1.60.69 | ||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||