001    /*
002    Copyright (c) 1996-2012, Damon Hart-Davis
003    All rights reserved.
004    
005    Redistribution and use in source and binary forms, with or without
006    modification, are permitted provided that the following conditions are
007    met:
008    
009      * Redistributions of source code must retain the above copyright
010        notice, this list of conditions and the following disclaimer.
011    
012      * Redistributions in binary form must reproduce the above copyright
013        notice, this list of conditions and the following disclaimer in the
014        documentation and/or other materials provided with the
015        distribution.
016    
017    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
018    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
019    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
020    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
021    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
022    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
023    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
024    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
025    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
026    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
027    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028    */
029    
030    package org.hd.d.pg2k.svrCore;
031    
032    import java.io.DataOutputStream;
033    import java.io.IOException;
034    import java.io.InvalidObjectException;
035    import java.io.ObjectInputStream;
036    import java.io.OutputStream;
037    import java.io.Serializable;
038    import java.security.DigestOutputStream;
039    import java.security.MessageDigest;
040    import java.util.Arrays;
041    import java.util.Collections;
042    import java.util.Comparator;
043    import java.util.HashSet;
044    import java.util.Hashtable;
045    import java.util.ResourceBundle;
046    import java.util.Set;
047    import java.util.SortedSet;
048    import java.util.StringTokenizer;
049    import java.util.TreeSet;
050    
051    import ORG.hd.d.IsDebug;
052    
053    
054    /**Basic utilities for retrieving and handling exhibit attribute/discardable words.
055     */
056    public final class ExhibitAttrUtils
057        {
058        /**Prevent instantiation. */
059        private ExhibitAttrUtils() { }
060    
061        /**Name of attribute words list in global properties. */
062        public static final String ATTR_WORD_LIST_NAME = "org.hd.d.pg.nameDiscardableWords";
063    
064        /**Gets unsorted list of attribute words from global properties; never null.
065         * This returns a list of valid unduplicated attribute words,
066         * with any problems found being reported on System.err.
067         * <p>
068         * This may return an empty list but not a null.
069         */
070        private static String[] _getAttributeWords()
071            {
072            try
073                {
074                final ResourceBundle gp = ResourceBundle.getBundle(CoreConsts.GLOBAL_PROPS_NAME);
075                final String words = gp.getString(ATTR_WORD_LIST_NAME);
076    
077                final StringTokenizer wordsList = new StringTokenizer(words);
078                final HashSet<String> theseWords = new HashSet<String>(wordsList.countTokens() * 2 + 1);
079                while(wordsList.hasMoreTokens())
080                    {
081                    final String w = wordsList.nextToken();
082                    if(!ExhibitName.validAttributeWord(w))
083                        {
084                        System.err.println("WARNING: ExhibitAttrUtils: illegal attribute word \"" + w + "\" in properties " + CoreConsts.GLOBAL_PROPS_NAME + ", property name " + ATTR_WORD_LIST_NAME + ".");
085                        continue; // Do not include the word.
086                        }
087                    // Eliminate duplicates.
088                    if(!theseWords.contains(w))
089                        { theseWords.add(MemoryTools.intern(w)); }
090                    }
091    
092                // Extract and return the (unsorted) set/list of attribute words.
093                final String result[] = new String[theseWords.size()];
094                theseWords.toArray(result);
095    if(IsDebug.isDebug) { System.out.println("[ExhibitAttrUtils: attribute words loaded: " + result.length + ".]"); }
096                return(result);
097                }
098            catch(final Exception e)
099                {
100                System.err.println("ERROR: ExhibitAttrUtils: did not find attribute words in properties " + CoreConsts.GLOBAL_PROPS_NAME + ", property name " + ATTR_WORD_LIST_NAME + ".");
101                e.printStackTrace();
102                return(new String[0]);
103                }
104            }
105    
106        /**This class contains exhibit attribute words.
107         * This data is derived from global config data.
108         */
109        public static final class ExhibitAttrWords implements Serializable
110            {
111            /**Construct a set of global (immutable) properties.
112             *
113             * @param attributeWords  set of attribute words for exhibit names;
114             *     need not be sorted,
115             *     may be null or zero length,
116             *     but should not contain any zero-length words
117             *     nor words consisting purely of digits
118             *     nor that contain anything other than digits or lower-case letters
119             */
120            private ExhibitAttrWords(final String attributeWords[])
121                {
122                // Make private copy of array and ensure that words are sorted.
123                // We will check that entries are valid and unduplicated in
124                // validateObject();
125                attrWordsSortedSet = Collections.unmodifiableSortedSet(
126                    new TreeSet<String>(Arrays.asList(attributeWords)));
127                attrWords = new String[attrWordsSortedSet.size()];
128                attrWordsSortedSet.toArray(attrWords);
129    
130                // Now compute our hash over the entries.
131                longHash = computeHash();
132    
133                // Set up our comparators and stubs.
134                _setComparators(attrWordsSortedSet);
135                SUBJECT_ORDER =
136                    (new IndirectComparatorStub(longHash){ // Trivial stub.
137                        @Override
138                        protected final Comparator<Name.ExhibitFull> getComp() { return(_SUBJECT_ORDER); }
139                        private static final long serialVersionUID = -7219799635852371814L;
140                        });
141                SMART_ORDER =
142                    (new IndirectComparatorStub(longHash){ // Trivial stub.
143                        @Override
144                        protected final Comparator<Name.ExhibitFull> getComp() { return(_SMART_ORDER); }
145                        private static final long serialVersionUID = -6487931467358267993L;
146                        });
147    
148                // Verify object state.
149                try { validateObject(); }
150                catch(final InvalidObjectException e)
151                    { throw new IllegalArgumentException(e.getMessage()); }
152                }
153    
154            /**The hash of all the data held; guaranteed non-negative.
155             * Depends all the information held in this object.
156             */
157            public final long longHash;
158    
159            /**The sorted and duplicate-free list of attribute words.
160             * All of these should meet the rules for ExhibitNames.validAttributeWord().
161             * <p>
162             * We are careful never to share a reference to this outside
163             * the class, eg upon deserialisation.
164             * <p>
165             * This is the primary store of attribute words information.
166             */
167            private /* final */ String attrWords[];
168    
169            /**The immutable sorted set of attribute words.
170             * A duplicate of the info in attrWords[].
171             */
172            private transient SortedSet<String> attrWordsSortedSet;
173    
174            /**The sorted and duplicate-free non-null list of attribute words.
175             * This array contains no nulls nor duplicates
176             * and all entries meet the rules for
177             * ExhibitNames.validAttributeWord().
178             * <p>
179             * Creating the return value may be expensive,
180             * requiring extensive copying.
181             *
182             * @return  private copy of attribute-words list
183             */
184            public String[] getAttrWords()
185                {
186                return((attrWords.clone()));
187                }
188    
189            /**The set of attribute words as a view in an immutable SortedSet.
190             * This set contains no nulls
191             * and all entries meet the rules for
192             * ExhibitNames.validAttributeWord().
193             * <p>
194             * Preparing this return value should take a constant, small, time.
195             *
196             * @return  immutable view of attribute-words sorted set
197             */
198            public SortedSet<String> getAttrWordsSortedSet()
199                {
200                return(attrWordsSortedSet);
201                }
202    
203            /**Device to allow serialisation of trivial stubs and do proper quality test.
204             * This allows serialisation to take place,
205             * and also allows equals() to be implemented by comparing the
206             * implementation class and the underlying longHash.
207             */
208            private static abstract class IndirectComparatorStub
209                implements Serializable, Comparator<Name.ExhibitFull>
210                {
211                /**Construct an instance with the hash embedded. */
212                IndirectComparatorStub(final long longHash)
213                    { hash = (int) longHash; }
214    
215                /**Local version of main hash. */
216                private final int hash;
217    
218                /**Get the comparator that we indirectly call. */
219                protected abstract Comparator<Name.ExhibitFull> getComp();
220    
221                /**Give this a fixed ID. */
222                private static final long serialVersionUID = 0x5e0f51a9b80d99ddL;
223    
224                /**Apply the underlying Comparator */
225                public final int compare(final Name.ExhibitFull ex1, final Name.ExhibitFull ex2)
226                    { return(getComp().compare(ex1, ex2)); }
227    
228                /**Compute a condensed version of the main hash. */
229                @Override
230                public int hashCode()
231                    { return(hash); }
232    
233                /**Is comparator equal (ie has some data changed)? */
234                @Override
235                public boolean equals(final Object obj)
236                    {
237                    if(obj == this) { return(true); } // Trival-but-common case.
238    
239                    // Must be a stub comparator...
240                    if(!(obj instanceof IndirectComparatorStub)) { return(false); }
241                    final IndirectComparatorStub other = (IndirectComparatorStub) obj;
242    
243                    // Must be same final (most-derived) anonymous Comparator class.
244                    if(getComp().getClass() != other.getComp().getClass()) { return(false); }
245    
246                    // Must have the same underlying hash
247                    // and therefore the same attribute words.
248                    if(hashCode() != obj.hashCode()) { return(false); }
249    
250                    return(true);
251                    }
252                }
253    
254            /**Internal copy of SUBJECT_ORDER; not serialised. */
255            private transient Comparator<Name.ExhibitFull> _SUBJECT_ORDER;
256    
257            /**Internal copy of SMART_ORDER; not serialised. */
258            private transient Comparator<Name.ExhibitFull> _SMART_ORDER;
259    
260            /**Class to implement our "subject" order Comparator.
261             * TODO: cache of intern()ed main words component (or sets of items with identical main words) to speed up searching in large blocks with same main words
262             */
263            private static final class SubjOrderComp implements Comparator<Name.ExhibitFull>
264                {
265                SubjOrderComp(final Set<String> attrWordsSortedSet)
266                    { this.attrWordsSortedSet = attrWordsSortedSet; }
267    
268                /**The set of valid attribute/discardable words; not null. */
269                private final Set<String> attrWordsSortedSet;
270    
271                public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
272                    {
273                    if(s1 == s2) { return(0); }
274    
275                    // Comparison by main-words (stem) component, case-insensitive.
276                    final int mainResult = TextUtils.CASE_INSENSITIVE_ORDER.compare(
277                        s1.getShortName().getMainWordsComponent(attrWordsSortedSet),
278                        s2.getShortName().getMainWordsComponent(attrWordsSortedSet));
279                    if(mainResult != 0) { return(mainResult); }
280    
281                    // Comparison by author (initials) component.
282                    final int authResult = TextUtils.compare(
283                        ExhibitName.getAuthorComponent(s1),
284                        ExhibitName.getAuthorComponent(s2));
285                    if(authResult != 0) { return(authResult); }
286    
287                    // Comparison by number-in-series component.
288                    final int nisResult =
289                        ExhibitName.getNumberInSeriesComponent(s1) -
290                        ExhibitName.getNumberInSeriesComponent(s2);
291                    if(nisResult != 0) { return(nisResult); }
292    
293                    return(0); // May differ only in attribute words.
294                    }
295                };
296    
297            /**Class to implement our "smart" order Comparator. */
298            private static final class SmartOrderComp implements Comparator<Name.ExhibitFull>
299                {
300                SmartOrderComp(final SubjOrderComp subjOC, final Set<String> attrWordsSortedSet)
301                    {
302                    this.subjOC = subjOC;
303                    this.attrWordsSortedSet = attrWordsSortedSet;
304                    }
305                private final SubjOrderComp subjOC;
306                private final Set<String> attrWordsSortedSet;
307    
308                public final int compare(final Name.ExhibitFull s1, final Name.ExhibitFull s2)
309                    {
310                    // Comparison/sort on exhibit subject.
311                    final int subjectResult =  subjOC.compare(s1, s2);
312                    if(subjectResult != 0) { return(subjectResult); }
313    
314                    // Comparison by attribute-words (stem) portion.
315                    CharSequence a1 = ExhibitName.getAttributeWordsComponent(s1, attrWordsSortedSet);
316                    if(a1 == null) { a1 = ""; }
317                    CharSequence a2 = ExhibitName.getAttributeWordsComponent(s2, attrWordsSortedSet);
318                    if(a2 == null) { a2 = ""; }
319                    final int attrResult = TextUtils.compare(a1, a2);
320                    if(attrResult != 0) { return(attrResult); }
321    
322                    // Needs a tie-break to achieve a total order.
323                    return(ExhibitName.SIMPLE_SMART_ORDER.compare(s1, s2));
324                    }
325                };
326    
327            /**Routine to set _SUBJECT_ORDER and _SMART_ORDER.
328             * Called from constructor and readObject().
329             */
330            private void _setComparators(final Set<String> attrWordsSortedSet)
331                {
332                final SubjOrderComp subjO = new SubjOrderComp(attrWordsSortedSet);
333                _SUBJECT_ORDER = subjO;
334                _SMART_ORDER = new SmartOrderComp(subjO, attrWordsSortedSet);
335                }
336    
337            /**A Comparator that sorts exhibits by name comparing as equal those that differ only in attribute words.
338             * This sorts, in order by:
339             * <ol>
340             * <li>main stem of the name case-insensitively
341             * <li>author
342             * <li>number-in-series
343             * </ol>
344             */
345            public final Comparator<Name.ExhibitFull> SUBJECT_ORDER;
346    
347            /**A Comparator that sorts exhibits by name taking account of attribute words.
348             * This sorts, in order by:
349             * <ol>
350             * <li>SUBJECT_ORDER to sort into order by exhibit subject
351             * <li>attribute words to sort within an exhibit subject
352             * <li>ExhibitName.SIMPLE_SMART_ORDER to break ties
353             * </ol>
354             * <p>
355             * equals() is true iff referring underlying comparator's backing AEP
356             * has an unchanged longHash.
357             */
358            public final Comparator<Name.ExhibitFull> SMART_ORDER;
359    
360            /**Compute a hash over all the data in the object; guaranteed non-negative.
361             * Aims to be quick and comprehensive and portable between VMs,
362             * and should not miss even quite subtle changes in the stored data.
363             */
364            private long computeHash()
365                {
366                // Running computed hash value.
367                long result = 0;
368    
369                // Start a message digest (SHA1).
370                final MessageDigest md = GenUtils.getStandardDigest();
371    
372                try {
373                    final DataOutputStream dos = new DataOutputStream(
374                                    new DigestOutputStream(
375                                        (new OutputStream(){ // Null output stream...
376                                            @Override
377                                            public final void write(final int b) { }
378                                            @Override
379                                            public final void write(final byte[] b, final int off, final int len) { }
380                                            }),
381                                        md));
382    
383                    // Put in attr words if any at all.
384                    for(int i = attrWords.length; --i >= 0; )
385                        {
386                        dos.writeUTF(attrWords[i]);
387                        }
388    
389                    dos.flush();
390                    }
391                catch(final IOException e) // Should never happen.
392                    { throw new Error("unexpected internal error"); }
393    
394                // Create the basic hash from the first 8 digest bytes.
395                final byte[] digest = md.digest();
396                long lHash = 0x9fe93d5e4820804bL; // Unique random base value.
397                for(int i = 8; --i >= 0; )
398                    { lHash ^= (((long) (digest[i] & 0xff)) << (8*i)); }
399    
400                // We guarantee that the hash is non-negative.
401                result ^= ((lHash >>> 1) | 1);
402    
403                // Return our computed hash.
404                return(result);
405                }
406    
407             /**Returns a hash code value for the object; derived from the longHash.
408              * Guaranteed zero if there are no exhibits, non-zero otherwise.
409              *
410              * @return  a hash code value for this object.
411              * @see     Object#equals(Object)
412              * @see     Hashtable
413              */
414             @Override
415            public int hashCode()
416                 {
417                 return(((int) (longHash >>> 32)) ^ (int) longHash);
418                 }
419    
420             /**Indicates whether some other object is "equal to" this one; the underlying data is the same if true.
421              * The hashNotChangedSince value is excluded from the comnparison.
422              *
423              * @param   obj   the reference object with which to compare.
424              * @return  <code>true</code> if this object is the same as the obj
425              *          argument; <code>false</code> otherwise.
426              * @see     Boolean#hashCode()
427              * @see     Hashtable
428              */
429             @Override
430            public synchronized boolean equals(final Object obj)
431                 {
432                 // Must be of the same type to be equal.
433                 if(!(obj instanceof ExhibitAttrWords)) { return(false); }
434                 final ExhibitAttrWords eaw = (ExhibitAttrWords) obj;
435    
436                 // We can quickly reject items that don't have the same hash...
437                 if(longHash != eaw.longHash) { return(false); }
438    
439                 // Check the underlying immutable data.
440                 if(!Arrays.equals(attrWords, eaw.attrWords)) { return(false); }
441    
442                 // OK, seems to actually be equal.
443                 return(true);
444                 }
445    
446    
447            /**Validate fields/state.
448             * Called in the constructor and possibly after de-serialising.
449             * <p>
450             * Barf if something bad is found.
451             * (Maybe allow some extra info in debug version.)
452             */
453            public void validateObject()
454                throws InvalidObjectException
455                {
456                // Check that all components are sane and safe.
457                if(longHash < 0)
458                    { throw new InvalidObjectException("bad object: longHash < 0"); }
459                // Check that we can correctly recompute the hash.
460                if(longHash != computeHash())
461                    { throw new InvalidObjectException("bad object: hash wrong"); }
462                if(attrWords == null)
463                    { throw new InvalidObjectException("bad object: attrWords == null"); }
464                for(int i = attrWords.length; --i >= 0; )
465                    {
466                    final String aW = attrWords[i];
467                    if(!ExhibitName.validAttributeWord(aW))
468                        { throw new InvalidObjectException("bad object: bad attrWord " + aW); }
469                    /*
470                    Don't need to check for duplicates or ordering
471                    because we process loaded words via a sorted set
472                    on construction and deserialisation.
473                    if((i > 0) && (aW.compareTo(attrWords[i-1]) <= 0))
474                        { throw new InvalidObjectException("bad object: duplicate or out-of-order attrWord " + aW); }
475                    */
476                    }
477                if(!(SMART_ORDER instanceof IndirectComparatorStub) ||
478                   !(SUBJECT_ORDER instanceof IndirectComparatorStub))
479                    { throw new InvalidObjectException("bad object: comparator wrong"); }
480                if(!(_SMART_ORDER instanceof Comparator<?>) ||
481                   !(_SUBJECT_ORDER instanceof Comparator<?>))
482                    { throw new InvalidObjectException("bad object: internal comparator wrong"); }
483                }
484    
485            /**Deserialise. */
486            private synchronized void readObject(final ObjectInputStream in)
487                throws IOException, ClassNotFoundException
488                {
489                in.defaultReadObject();
490    
491                // Make private copy of attrWords[] and ensure that words are sorted.
492                // We will check that entries are valid in validateObject().
493                attrWordsSortedSet = Collections.unmodifiableSortedSet(
494                    new TreeSet<String>(Arrays.asList(attrWords)));
495                attrWords = new String[attrWordsSortedSet.size()];
496                attrWordsSortedSet.toArray(attrWords);
497    
498                // Set up our comparators.
499                _setComparators(attrWordsSortedSet);
500    
501                validateObject(); // Validate state immediately.
502                }
503    
504            /**Our serial version... */
505            private static final long serialVersionUID = 0xf130cee4509ca259L;
506            }
507    
508        /**Immutable set of exhibit attribute words, comparators, etc; never null. */
509        private static final ExhibitAttrWords attrWords = new ExhibitAttrWords(_getAttributeWords());
510    
511        /**Gets (immutable) set of exhibit attribute words, comparators, etc; never null. */
512        public static final ExhibitAttrWords getAttrWords()
513            { return(attrWords); }
514        }