001 /*
002 Copyright (c) 1996-2012, Damon Hart-Davis
003 All rights reserved.
004
005 Redistribution and use in source and binary forms, with or without
006 modification, are permitted provided that the following conditions are
007 met:
008
009 * Redistributions of source code must retain the above copyright
010 notice, this list of conditions and the following disclaimer.
011
012 * Redistributions in binary form must reproduce the above copyright
013 notice, this list of conditions and the following disclaimer in the
014 documentation and/or other materials provided with the
015 distribution.
016
017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 */
029
030 package org.hd.d.pg2k.svrCore;
031
032 import java.io.DataOutputStream;
033 import java.io.IOException;
034 import java.io.InvalidObjectException;
035 import java.io.ObjectInputStream;
036 import java.io.OutputStream;
037 import java.io.Serializable;
038 import java.security.DigestOutputStream;
039 import java.security.MessageDigest;
040 import java.util.Arrays;
041 import java.util.Collections;
042 import java.util.Comparator;
043 import java.util.HashSet;
044 import java.util.Hashtable;
045 import java.util.ResourceBundle;
046 import java.util.Set;
047 import java.util.SortedSet;
048 import java.util.StringTokenizer;
049 import java.util.TreeSet;
050
051 import ORG.hd.d.IsDebug;
052
053
054 /**Basic utilities for retrieving and handling exhibit attribute/discardable words.
055 */
056 public final class ExhibitAttrUtils
057 {
058 /**Prevent instantiation. */
059 private ExhibitAttrUtils() { }
060
061 /**Name of attribute words list in global properties. */
062 public static final String ATTR_WORD_LIST_NAME = "org.hd.d.pg.nameDiscardableWords";
063
064 /**Gets unsorted list of attribute words from global properties; never null.
065 * This returns a list of valid unduplicated attribute words,
066 * with any problems found being reported on System.err.
067 * <p>
068 * This may return an empty list but not a null.
069 */
070 private static String[] _getAttributeWords()
071 {
072 try
073 {
074 final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
075 final String words = gp.getString(ATTR_WORD_LIST_NAME);
076
077 final StringTokenizer wordsList = new StringTokenizer(words);
078 final HashSet<String> theseWords = new HashSet<String>(wordsList.countTokens() * 2 + 1);
079 while(wordsList.hasMoreTokens())
080 {
081 final String w = wordsList.nextToken();
082 if(!ExhibitName.validAttributeWord(w))
083 {
084 System.err.println("WARNING: ExhibitAttrUtils: illegal attribute word \"" + w + "\" in properties " + CoreConsts.GLOBAL_PROPS_NAME + ", property name " + ATTR_WORD_LIST_NAME + ".");
085 continue; // Do not include the word.
086 }
087 // Eliminate duplicates.
088 if(!theseWords.contains(w))
089 { theseWords.add(MemoryTools.intern(w)); }
090 }
091
092 // Extract and return the (unsorted) set/list of attribute words.
093 final String result[] = new String[theseWords.size()];
094 theseWords.toArray(result);
095 if(IsDebug.isDebug) { System.out.println("[ExhibitAttrUtils: attribute words loaded: " + result.length + ".]"); }
096 return(result);
097 }
098 catch(final Exception e)
099 {
100 System.err.println("ERROR: ExhibitAttrUtils: did not find attribute words in properties " + CoreConsts.GLOBAL_PROPS_NAME + ", property name " + ATTR_WORD_LIST_NAME + ".");
101 e.printStackTrace();
102 return(new String[0]);
103 }
104 }
105
106 /**This class contains exhibit attribute words.
107 * This data is derived from global config data.
108 */
109 public static final class ExhibitAttrWords implements Serializable
110 {
111 /**Construct a set of global (immutable) properties.
112 *
113 * @param attributeWords set of attribute words for exhibit names;
114 * need not be sorted,
115 * may be null or zero length,
116 * but should not contain any zero-length words
117 * nor words consisting purely of digits
118 * nor that contain anything other than digits or lower-case letters
119 */
120 private ExhibitAttrWords(final String attributeWords[])
121 {
122 // Make private copy of array and ensure that words are sorted.
123 // We will check that entries are valid and unduplicated in
124 // validateObject();
125 attrWordsSortedSet = Collections.unmodifiableSortedSet(
126 new TreeSet<String>(Arrays.asList(attributeWords)));
127 attrWords = new String[attrWordsSortedSet.size()];
128 attrWordsSortedSet.toArray(attrWords);
129
130 // Now compute our hash over the entries.
131 longHash = computeHash();
132
133 // Set up our comparators and stubs.
134 _setComparators(attrWordsSortedSet);
135 SUBJECT_ORDER =
136 (new IndirectComparatorStub(longHash){ // Trivial stub.
137 @Override
138 protected final Comparator<Name.ExhibitFull> getComp() { return(_SUBJECT_ORDER); }
139 private static final long serialVersionUID = -7219799635852371814L;
140 });
141 SMART_ORDER =
142 (new IndirectComparatorStub(longHash){ // Trivial stub.
143 @Override
144 protected final Comparator<Name.ExhibitFull> getComp() { return(_SMART_ORDER); }
145 private static final long serialVersionUID = -6487931467358267993L;
146 });
147
148 // Verify object state.
149 try { validateObject(); }
150 catch(final InvalidObjectException e)
151 { throw new IllegalArgumentException(e.getMessage()); }
152 }
153
154 /**The hash of all the data held; guaranteed non-negative.
155 * Depends all the information held in this object.
156 */
157 public final long longHash;
158
159 /**The sorted and duplicate-free list of attribute words.
160 * All of these should meet the rules for ExhibitNames.validAttributeWord().
161 * <p>
162 * We are careful never to share a reference to this outside
163 * the class, eg upon deserialisation.
164 * <p>
165 * This is the primary store of attribute words information.
166 */
167 private /* final */ String attrWords[];
168
169 /**The immutable sorted set of attribute words.
170 * A duplicate of the info in attrWords[].
171 */
172 private transient SortedSet<String> attrWordsSortedSet;
173
174 /**The sorted and duplicate-free non-null list of attribute words.
175 * This array contains no nulls nor duplicates
176 * and all entries meet the rules for
177 * ExhibitNames.validAttributeWord().
178 * <p>
179 * Creating the return value may be expensive,
180 * requiring extensive copying.
181 *
182 * @return private copy of attribute-words list
183 */
184 public String[] getAttrWords()
185 {
186 return((attrWords.clone()));
187 }
188
189 /**The set of attribute words as a view in an immutable SortedSet.
190 * This set contains no nulls
191 * and all entries meet the rules for
192 * ExhibitNames.validAttributeWord().
193 * <p>
194 * Preparing this return value should take a constant, small, time.
195 *
196 * @return immutable view of attribute-words sorted set
197 */
198 public SortedSet<String> getAttrWordsSortedSet()
199 {
200 return(attrWordsSortedSet);
201 }
202
203 /**Device to allow serialisation of trivial stubs and do proper quality test.
204 * This allows serialisation to take place,
205 * and also allows equals() to be implemented by comparing the
206 * implementation class and the underlying longHash.
207 */
208 private static abstract class IndirectComparatorStub
209 implements Serializable, Comparator<Name.ExhibitFull>
210 {
211 /**Construct an instance with the hash embedded. */
212 IndirectComparatorStub(final long longHash)
213 { hash = (int) longHash; }
214
215 /**Local version of main hash. */
216 private final int hash;
217
218 /**Get the comparator that we indirectly call. */
219 protected abstract Comparator<Name.ExhibitFull> getComp();
220
221 /**Give this a fixed ID. */
222 private static final long serialVersionUID = 0x5e0f51a9b80d99ddL;
223
224 /**Apply the underlying Comparator */
225 public final int compare(final Name.ExhibitFull ex1, final Name.ExhibitFull ex2)
226 { return(getComp().compare(ex1, ex2)); }
227
228 /**Compute a condensed version of the main hash. */
229 @Override
230 public int hashCode()
231 { return(hash); }
232
233 /**Is comparator equal (ie has some data changed)? */
234 @Override
235 public boolean equals(final Object obj)
236 {
237 if(obj == this) { return(true); } // Trival-but-common case.
238
239 // Must be a stub comparator...
240 if(!(obj instanceof IndirectComparatorStub)) { return(false); }
241 final IndirectComparatorStub other = (IndirectComparatorStub) obj;
242
243 // Must be same final (most-derived) anonymous Comparator class.
244 if(getComp().getClass() != other.getComp().getClass()) { return(false); }
245
246 // Must have the same underlying hash
247 // and therefore the same attribute words.
248 if(hashCode() != obj.hashCode()) { return(false); }
249
250 return(true);
251 }
252 }
253
254 /**Internal copy of SUBJECT_ORDER; not serialised. */
255 private transient Comparator<Name.ExhibitFull> _SUBJECT_ORDER;
256
257 /**Internal copy of SMART_ORDER; not serialised. */
258 private transient Comparator<Name.ExhibitFull> _SMART_ORDER;
259
260 /**Class to implement our "subject" order Comparator.
261 * TODO: cache of intern()ed main words component (or sets of items with identical main words) to speed up searching in large blocks with same main words
262 */
263 private static final class SubjOrderComp implements Comparator<Name.ExhibitFull>
264 {
265 SubjOrderComp(final Set<String> attrWordsSortedSet)
266 { this.attrWordsSortedSet = attrWordsSortedSet; }
267
268 /**The set of valid attribute/discardable words; not null. */
269 private final Set<String> attrWordsSortedSet;
270
271 public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
272 {
273 if(s1 == s2) { return(0); }
274
275 // Comparison by main-words (stem) component, case-insensitive.
276 final int mainResult = TextUtils.CASE_INSENSITIVE_ORDER.compare(
277 s1.getShortName().getMainWordsComponent(attrWordsSortedSet),
278 s2.getShortName().getMainWordsComponent(attrWordsSortedSet));
279 if(mainResult != 0) { return(mainResult); }
280
281 // Comparison by author (initials) component.
282 final int authResult = TextUtils.compare(
283 ExhibitName.getAuthorComponent(s1),
284 ExhibitName.getAuthorComponent(s2));
285 if(authResult != 0) { return(authResult); }
286
287 // Comparison by number-in-series component.
288 final int nisResult =
289 ExhibitName.getNumberInSeriesComponent(s1) -
290 ExhibitName.getNumberInSeriesComponent(s2);
291 if(nisResult != 0) { return(nisResult); }
292
293 return(0); // May differ only in attribute words.
294 }
295 };
296
297 /**Class to implement our "smart" order Comparator. */
298 private static final class SmartOrderComp implements Comparator<Name.ExhibitFull>
299 {
300 SmartOrderComp(final SubjOrderComp subjOC, final Set<String> attrWordsSortedSet)
301 {
302 this.subjOC = subjOC;
303 this.attrWordsSortedSet = attrWordsSortedSet;
304 }
305 private final SubjOrderComp subjOC;
306 private final Set<String> attrWordsSortedSet;
307
308 public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
309 {
310 // Comparison/sort on exhibit subject.
311 final int subjectResult = subjOC.compare(s1, s2);
312 if(subjectResult != 0) { return(subjectResult); }
313
314 // Comparison by attribute-words (stem) portion.
315 CharSequence a1 = ExhibitName.getAttributeWordsComponent(s1, attrWordsSortedSet);
316 if(a1 == null) { a1 = ""; }
317 CharSequence a2 = ExhibitName.getAttributeWordsComponent(s2, attrWordsSortedSet);
318 if(a2 == null) { a2 = ""; }
319 final int attrResult = TextUtils.compare(a1, a2);
320 if(attrResult != 0) { return(attrResult); }
321
322 // Needs a tie-break to achieve a total order.
323 return(ExhibitName.SIMPLE_SMART_ORDER.compare(s1, s2));
324 }
325 };
326
327 /**Routine to set _SUBJECT_ORDER and _SMART_ORDER.
328 * Called from constructor and readObject().
329 */
330 private void _setComparators(final Set<String> attrWordsSortedSet)
331 {
332 final SubjOrderComp subjO = new SubjOrderComp(attrWordsSortedSet);
333 _SUBJECT_ORDER = subjO;
334 _SMART_ORDER = new SmartOrderComp(subjO, attrWordsSortedSet);
335 }
336
337 /**A Comparator that sorts exhibits by name comparing as equal those that differ only in attribute words.
338 * This sorts, in order by:
339 * <ol>
340 * <li>main stem of the name case-insensitively
341 * <li>author
342 * <li>number-in-series
343 * </ol>
344 */
345 public final Comparator<Name.ExhibitFull> SUBJECT_ORDER;
346
347 /**A Comparator that sorts exhibits by name taking account of attribute words.
348 * This sorts, in order by:
349 * <ol>
350 * <li>SUBJECT_ORDER to sort into order by exhibit subject
351 * <li>attribute words to sort within an exhibit subject
352 * <li>ExhibitName.SIMPLE_SMART_ORDER to break ties
353 * </ol>
354 * <p>
355 * equals() is true iff referring underlying comparator's backing AEP
356 * has an unchanged longHash.
357 */
358 public final Comparator<Name.ExhibitFull> SMART_ORDER;
359
360 /**Compute a hash over all the data in the object; guaranteed non-negative.
361 * Aims to be quick and comprehensive and portable between VMs,
362 * and should not miss even quite subtle changes in the stored data.
363 */
364 private long computeHash()
365 {
366 // Running computed hash value.
367 long result = 0;
368
369 // Start a message digest (SHA1).
370 final MessageDigest md = GenUtils.getStandardDigest();
371
372 try {
373 final DataOutputStream dos = new DataOutputStream(
374 new DigestOutputStream(
375 (new OutputStream(){ // Null output stream...
376 @Override
377 public final void write(final int b) { }
378 @Override
379 public final void write(final byte[] b, final int off, final int len) { }
380 }),
381 md));
382
383 // Put in attr words if any at all.
384 for(int i = attrWords.length; --i >= 0; )
385 {
386 dos.writeUTF(attrWords[i]);
387 }
388
389 dos.flush();
390 }
391 catch(final IOException e) // Should never happen.
392 { throw new Error("unexpected internal error"); }
393
394 // Create the basic hash from the first 8 digest bytes.
395 final byte[] digest = md.digest();
396 long lHash = 0x9fe93d5e4820804bL; // Unique random base value.
397 for(int i = 8; --i >= 0; )
398 { lHash ^= (((long) (digest[i] & 0xff)) << (8*i)); }
399
400 // We guarantee that the hash is non-negative.
401 result ^= ((lHash >>> 1) | 1);
402
403 // Return our computed hash.
404 return(result);
405 }
406
407 /**Returns a hash code value for the object; derived from the longHash.
408 * Guaranteed zero if there are no exhibits, non-zero otherwise.
409 *
410 * @return a hash code value for this object.
411 * @see Object#equals(Object)
412 * @see Hashtable
413 */
414 @Override
415 public int hashCode()
416 {
417 return(((int) (longHash >>> 32)) ^ (int) longHash);
418 }
419
420 /**Indicates whether some other object is "equal to" this one; the underlying data is the same if true.
421 * The hashNotChangedSince value is excluded from the comnparison.
422 *
423 * @param obj the reference object with which to compare.
424 * @return <code>true</code> if this object is the same as the obj
425 * argument; <code>false</code> otherwise.
426 * @see Boolean#hashCode()
427 * @see Hashtable
428 */
429 @Override
430 public synchronized boolean equals(final Object obj)
431 {
432 // Must be of the same type to be equal.
433 if(!(obj instanceof ExhibitAttrWords)) { return(false); }
434 final ExhibitAttrWords eaw = (ExhibitAttrWords) obj;
435
436 // We can quickly reject items that don't have the same hash...
437 if(longHash != eaw.longHash) { return(false); }
438
439 // Check the underlying immutable data.
440 if(!Arrays.equals(attrWords, eaw.attrWords)) { return(false); }
441
442 // OK, seems to actually be equal.
443 return(true);
444 }
445
446
447 /**Validate fields/state.
448 * Called in the constructor and possibly after de-serialising.
449 * <p>
450 * Barf if something bad is found.
451 * (Maybe allow some extra info in debug version.)
452 */
453 public void validateObject()
454 throws InvalidObjectException
455 {
456 // Check that all components are sane and safe.
457 if(longHash < 0)
458 { throw new InvalidObjectException("bad object: longHash < 0"); }
459 // Check that we can correctly recompute the hash.
460 if(longHash != computeHash())
461 { throw new InvalidObjectException("bad object: hash wrong"); }
462 if(attrWords == null)
463 { throw new InvalidObjectException("bad object: attrWords == null"); }
464 for(int i = attrWords.length; --i >= 0; )
465 {
466 final String aW = attrWords[i];
467 if(!ExhibitName.validAttributeWord(aW))
468 { throw new InvalidObjectException("bad object: bad attrWord " + aW); }
469 /*
470 Don't need to check for duplicates or ordering
471 because we process loaded words via a sorted set
472 on construction and deserialisation.
473 if((i > 0) && (aW.compareTo(attrWords[i-1]) <= 0))
474 { throw new InvalidObjectException("bad object: duplicate or out-of-order attrWord " + aW); }
475 */
476 }
477 if(!(SMART_ORDER instanceof IndirectComparatorStub) ||
478 !(SUBJECT_ORDER instanceof IndirectComparatorStub))
479 { throw new InvalidObjectException("bad object: comparator wrong"); }
480 if(!(_SMART_ORDER instanceof Comparator<?>) ||
481 !(_SUBJECT_ORDER instanceof Comparator<?>))
482 { throw new InvalidObjectException("bad object: internal comparator wrong"); }
483 }
484
485 /**Deserialise. */
486 private synchronized void readObject(final ObjectInputStream in)
487 throws IOException, ClassNotFoundException
488 {
489 in.defaultReadObject();
490
491 // Make private copy of attrWords[] and ensure that words are sorted.
492 // We will check that entries are valid in validateObject().
493 attrWordsSortedSet = Collections.unmodifiableSortedSet(
494 new TreeSet<String>(Arrays.asList(attrWords)));
495 attrWords = new String[attrWordsSortedSet.size()];
496 attrWordsSortedSet.toArray(attrWords);
497
498 // Set up our comparators.
499 _setComparators(attrWordsSortedSet);
500
501 validateObject(); // Validate state immediately.
502 }
503
504 /**Our serial version... */
505 private static final long serialVersionUID = 0xf130cee4509ca259L;
506 }
507
508 /**Immutable set of exhibit attribute words, comparators, etc; never null. */
509 private static final ExhibitAttrWords attrWords = new ExhibitAttrWords(_getAttributeWords());
510
511 /**Gets (immutable) set of exhibit attribute words, comparators, etc; never null. */
512 public static final ExhibitAttrWords getAttrWords()
513 { return(attrWords); }
514 }