Coverage details for com.topcoder.document.index.persistence.impl.db.FastAccessDBIndexPersistence

LineHitsSource
1 /*
2  * Copyright (C) 2006 TopCoder Inc., All Rights Reserved.
3  */
4 package com.topcoder.document.index.persistence.impl.db;
5  
6 import com.topcoder.db.connectionfactory.DBConnectionFactory;
7 import com.topcoder.document.index.CollectionIndex;
8 import com.topcoder.document.index.DocumentIndex;
9 import com.topcoder.document.index.persistence.IndexPersistenceException;
10 import com.topcoder.document.index.persistence.impl.PersistenceConfigurationException;
11 import com.topcoder.document.index.persistence.impl.Utils;
12 import com.topcoder.document.index.wordsource.WordSourceId;
13  
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.io.Reader;
17 import java.io.Writer;
18 import java.sql.Clob;
19 import java.sql.Connection;
20 import java.sql.PreparedStatement;
21 import java.sql.ResultSet;
22 import java.sql.SQLException;
23 import java.text.CollationKey;
24 import java.text.Collator;
25 import java.util.ArrayList;
26 import java.util.HashMap;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Locale;
30 import java.util.Map;
31 import java.util.Set;
32  
33  
34 /**
35  * <p>This is an implementation of the AbstractDBIndexPersistence that deals with a fast, CLOB based, normalized data
36  * based schema. This is provided for a user how is concerned about speed of the execution. By storing all the word and
37  * word position data in a single CLOB structure we are achieving a 4000% speed increase over a comparable solution that
38  * uses thousands of records to store words and related positions Not thread-safe</p>
39  *
40  * @author AleaActaEst, TCSDEVELOPER
41  * @version 1.0
42  */
43 public class FastAccessDBIndexPersistence extends AbstractDBIndexPersistence {
44  
45     /**
46      * This constructor will populate the connectionFactory and connectionName information from configuration.
47      *
48      * @param namespace The namespace for connection-related properties
49      *
50      * @throws IllegalArgumentException if namespace param is <tt>null</tt> or an empty (trim'd) String
51      * @throws PersistenceConfigurationException
52      * if there are configuration issues encountered
53      * @see AbstractDBIndexPersistence#AbstractDBIndexPersistence(String)
54      */
55     public FastAccessDBIndexPersistence(final String namespace) throws PersistenceConfigurationException {
56         // arg checking is done in super constructor
57287        super(namespace);
58278    }
59  
60     /**
61      * This constructor will populate the connectionFactory and connectionName information from input parameters.
62      *
63      * @param connectionFactory DBConnectionFactory instance used to generate connections
64      * @param connectionName The name of the connection used for persistence operations
65      *
66      * @throws IllegalArgumentException if connectionFactory is <tt>null</tt>
67      */
68     public FastAccessDBIndexPersistence(final DBConnectionFactory connectionFactory, final String connectionName) {
69         // arg checking is done in super constructor
704        super(connectionFactory, connectionName);
713    }
72  
73     /**
74      * This method adds the given DocumentIndex to the persistence.
75      * <p/>
76      * CS section 1.4.2.1 describes the algorithm of this method.
77      *
78      * @param documentIndex WordSource representing the document to be persisted
79      *
80      * @throws IllegalArgumentException when documentIndex is <tt>null</tt>
81      * @throws IndexPersistenceException when fails to add DocumentIndex to the persistence or given index is already
82      * persisted
83      */
84     public void addDocumentIndex(final DocumentIndex documentIndex) throws IndexPersistenceException {
8536        if (documentIndex == null) {
861            throw new IllegalArgumentException("The parameter named [documentIndex] was null.");
87         }
8835        final Connection connection = getConnection();
8935        beginTransaction(connection);
90  
9135        final String documentId =
92             CommonDatabaseOperations.insertDocumentHeaderData(this, documentIndex.getWordSourceId(), connection);
93  
94         // the validity of these values (CS 1.4.1.3) is guaranteed by the final class
95         // DocumentIndex and its constructor logic
9634        final Map words = documentIndex.getWords();
97  
98         //the delimiter used in the stream to delimit our tokens
9934        final String posDelim = documentIndex.getWordSourceId().getDelimiters()[0];
100  
10134        PreparedStatement preparedStatement = null;
102         try {
103             // This algorithm might seem somewhat cumbersome but it is the only way to obtain a CLOB instance and
104             // stream the data to the database in case one wants to avoid the usage of vendor-specific JDBC extensions.
105             // general procedure:
106             // 1. create record with clob column value being not null,
107             // 2. query the clob value,
108             // 3. use the clob instance in a new update statement,
109             // 4. stream the data into the clob instance and
110             // 5. execute the update statement
11134            preparedStatement = connection.prepareStatement(
112                 "INSERT INTO WORD_DATA (DOCUMENT_ID,WORD_DATA) VALUES (?,?)");
11334            preparedStatement.setString(1, documentId);
11434            preparedStatement.setString(2, "a");
11534            preparedStatement.executeUpdate();
11634            preparedStatement.close();
11734            preparedStatement = connection.prepareStatement("SELECT WORD_DATA FROM WORD_DATA WHERE DOCUMENT_ID=?",
118                 ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE);
11934            preparedStatement.setString(1, documentId);
12034            final ResultSet resultSet = preparedStatement.executeQuery();
121             try {
12234                resultSet.next();
12334                final Clob clob = resultSet.getClob(1);
124  
12534                preparedStatement.close();
12634                preparedStatement = connection.prepareStatement(
127                     "UPDATE WORD_DATA SET WORD_DATA=? WHERE DOCUMENT_ID=?");
12834                preparedStatement.setClob(1, clob);
12934                preparedStatement.setString(2, documentId);
13034                final Writer writer = clob.setCharacterStream(0);
131  
13234                final Set wordSet = words.entrySet();
13334                writeWordDataToWriter(wordSet, writer, posDelim);
13434                preparedStatement.executeUpdate();
135             } finally {
13634                resultSet.close();
13734            }
13834        } catch (SQLException e) {
1390            rollbackTransaction(connection);
1400            throw new IndexPersistenceException("Error while inserting CLOB data to database", e);
1410        } catch (IOException e) {
1420            rollbackTransaction(connection);
1430            throw new IndexPersistenceException("Error while inserting CLOB data to database", e);
144         } finally {
1450            if (preparedStatement != null) {
146                 try {
14734                    preparedStatement.close();
1480                } catch (SQLException e) {
149                     // this exception is minor re-throwing it would either
150                     // mask some other exception or break some otherwise successful operation
15168                }
152             }
15334        }
154  
15534        commitTransaction(connection);
15634    }
157  
158     /**
159      * This method does retrieve a DocumentIndex with the specified WordSourceId from the persistence. It does return
160      * <tt>null</tt>, if DocumentIndex with the given WordSourceId is not found in the persistence.
161      * <p/>
162      * CS section 1.4.1.1 describes the algorithm of this method.
163      *
164      * @param wordSourceId WordSourceId of document to retrieve
165      *
166      * @return retrieved DocumentIndex, or <tt>null</tt> if document index with given WordSourceId is not found in the
167      * persistence
168      *
169      * @throws IllegalArgumentException when wordSourceId is <tt>null</tt>
170      * @throws IndexPersistenceException when fails to retrieve document index with the specified WordSourceId
171      */
172     public DocumentIndex getDocumentIndex(final WordSourceId wordSourceId)
173         throws IndexPersistenceException {
17425        if (wordSourceId == null) {
1751            throw new IllegalArgumentException("The parameter named [wordSourceId] was null.");
176         }
177  
17824        final String documentId = Utils.createIdString(wordSourceId);
179  
18024        final Object[] queryArgsDocumentId = new Object[]{documentId};
181  
18224        final Connection connection = getConnection();
18324        final Integer count = (Integer) DatabaseUtils.doSingleValueQuery(connection, this,
184             "SELECT COUNT(*) FROM DOCUMENT WHERE DOCUMENT_ID=?", queryArgsDocumentId,
185             DatabaseUtils.INTEGER_TYPE);
18624        if (count.intValue() == 0) {
1872            DatabaseUtils.closeSilently(connection);
1882            return null;
189         }
190  
19122        final Locale locale = wordSourceId.getSourceLocale();
19222        final Collator collator = Collator.getInstance(locale);
193  
194         final Map words;
19522        final String delim = wordSourceId.getDelimiters()[0];
196  
197         try {
19822            final PreparedStatement preparedStatement = connection.prepareStatement(
199                 "SELECT WORD_DATA FROM WORD_DATA WHERE DOCUMENT_ID=?");
20022            preparedStatement.setString(1, documentId);
20122            final ResultSet resultSet = preparedStatement.executeQuery();
202             try {
20322                resultSet.next();
20422                final Reader characterStream = resultSet.getClob(1).getCharacterStream();
20522                words = readWordDataFromReader(characterStream, delim, collator);
206             } finally {
20722                resultSet.close();
20822            }
2090        } catch (SQLException e) {
2100            rollbackTransaction(connection);
2110            throw new IndexPersistenceException("Error while reading CLOB data from database", e);
2120        } catch (IOException e) {
2130            rollbackTransaction(connection);
2140            throw new IndexPersistenceException("Error while reading CLOB data from database", e);
21522        }
21622        DatabaseUtils.closeSilently(connection);
21722        return new DocumentIndex(wordSourceId, words);
218     }
219  
220     /**
221      * This method does remove a document index with given WordSourceId from the persistence. When the document index is
222      * not found, its use count is greater that zero, or function fail to remove it exception is thrown.
223      * <p/>
224      * CS section 1.4.1.2 describes the algorithm of this method.
225      *
226      * @param wordSourceId WordSourceId of document index to remove
227      *
228      * @throws IllegalArgumentException when WordSourceId is <tt>null</tt>
229      * @throws IndexPersistenceException when document index is not found in the persistence, its use count is not zero,
230      * or error happens when trying to remove it
231      */
232     public void removeDocumentIndex(final WordSourceId wordSourceId) throws IndexPersistenceException {
233         //arg checking and everything else is in common method
2344        if (wordSourceId == null) {
2351            throw new IllegalArgumentException("The parameter named [wordSourceId] was null.");
236         }
237  
2383        final String documentId = Utils.createIdString(wordSourceId);
239  
2403        final Connection connection = getConnection();
2413        final Object[] documentIdQueryArg = new Object[]{documentId};
2423        final int count = getDocumentUseCount(wordSourceId);
2432        if (count != 0) {
2441            throw new IndexPersistenceException("The index with the given id [" + wordSourceId
245                 + "] cannot be deleted as its use count is not 0, but is " + count + ".");
246         }
247  
2481        beginTransaction(connection);
249  
2501        DatabaseUtils.doDMLQuery(connection, this, "DELETE FROM WORD_DATA WHERE DOCUMENT_ID=?",
251             documentIdQueryArg);
2521        DatabaseUtils.doDMLQuery(connection, this, "DELETE FROM DELIMITER WHERE DOCUMENT_ID=?",
253             documentIdQueryArg);
2541        DatabaseUtils.doDMLQuery(connection, this, "DELETE FROM DOCUMENT WHERE DOCUMENT_ID=?",
255             documentIdQueryArg);
256         // the DOCUMENT_COLLECTION_DOCUMENT_XREF table remains untouched, as the indexer component assures no
257         // matching entry to exist in that table when use count of document is 0
258  
2591        commitTransaction(connection);
2601    }
261  
262     /**
263      * This method does store the given document collection index in the persistence.
264      * <p/>
265      * CS section 1.4.3.1 describes the algorithm of this method.
266      *
267      * @param collectionIndex CollectionIndex to store
268      *
269      * @throws IllegalArgumentException if collectionIndex is <tt>null</tt>
270      * @throws IndexPersistenceException if fails to store document collection index or given index is already
271      * persisted
272      */
273     public void addCollectionIndex(final CollectionIndex collectionIndex)
274         throws IndexPersistenceException {
275         //arg checking and everything else is in common method
2769        CommonDatabaseOperations.addCollectionIndex(this, collectionIndex);
2776    }
278  
279     /**
280      * This method retrieves a document collection index with specified identifier. It does return <tt>null</tt> when
281      * collection with specified identifier is not found.
282      * <p/>
283      * CS section 1.4.3.2 describes the algorithm of this method.
284      *
285      * @param collectionId identifier of document collection index to retrieve
286      *
287      * @return CollectionIndex with specified identifier or <tt>null</tt> if the collection index with the given
288      * identifier was not found in persistence
289      *
290      * @throws IllegalArgumentException when collectionId is <tt>null</tt> or an empty (trim'd) string
291      * @throws IndexPersistenceException when collection index can not be retrieved
292      */
293     public CollectionIndex getCollectionIndex(final String collectionId) throws IndexPersistenceException {
294         //arg checking and everything else is in common method
29510        return CommonDatabaseOperations.getCollectionIndex(this, collectionId);
296     }
297  
298     /**
299      * This method does remove the document collection index with specified identifier from the persistence.
300      * <p/>
301      * CS section 1.4.3.4 describes the algorithm of this method.
302      *
303      * @param collectionId identifier of document collection index to remove
304      *
305      * @throws IllegalArgumentException when collectionId is <tt>null</tt> or empty (trim'd) string
306      * @throws IndexPersistenceException when fails to remove CollectionIndex with given identifier from database, or
307      * when collection with that specific identifier did not exist in database
308      */
309     public void removeCollectionIndex(final String collectionId) throws IndexPersistenceException {
310         //arg checking and everything else is in common method
3114        CommonDatabaseOperations.removeCollectionIndex(this, collectionId);
3121    }
313  
314     /**
315      * This method does update specified CollectionIndex in the persistence.
316      * <p/>
317      * CS section 1.4.3.3 describes the algorithm of this method.
318      *
319      * @param collectionIndex CollectionIndex to update
320      *
321      * @throws IllegalArgumentException if collectionIndex parameter is <tt>null</tt>
322      * @throws IndexPersistenceException if any error happens when updating the collection index in the persistence,
323      * this includes the case, when the specified collectionIndex is not found in the
324      * persistence
325      */
326     public void updateCollectionIndex(final CollectionIndex collectionIndex) throws IndexPersistenceException {
327         //arg checking and everything else is in common method
3283        CommonDatabaseOperations.updateCollectionIndex(this, collectionIndex);
3291    }
330  
331     /**
332      * This method increases the use count value for document index with specified WordSourceId by 1.
333      *
334      * @param wordSourceId WordSourceId of document index of which to update use count
335      *
336      * @throws IllegalArgumentException if wordSourceId is <tt>null</tt>
337      * @throws IndexPersistenceException when fails to increase document index use count in the persistence or the
338      * document with the given wordSourceId does not exist in persistence
339      */
340     public void increaseDocumentUseCount(final WordSourceId wordSourceId) throws IndexPersistenceException {
341         //arg checking and everything else is in common method
3427        CommonDatabaseOperations.increaseDocumentUseCount(this, wordSourceId, 1);
3435    }
344  
345     /**
346      * This method decreases the use count value for document index with specified WordSourceId by 1.
347      *
348      * @param wordSourceId WordSourceId of document index of which to update use count
349      *
350      * @throws IllegalArgumentException if wordSourceId is <tt>null</tt>
351      * @throws IndexPersistenceException when fails to increase document index use count in the persistence or the
352      * document with the given wordSourceId does not exist in persistence
353      */
354     public void decreaseDocumentUseCount(final WordSourceId wordSourceId) throws IndexPersistenceException {
355         //arg checking and everything else is in common method
3566        CommonDatabaseOperations.increaseDocumentUseCount(this, wordSourceId, -1);
3574    }
358  
359     /**
360      * This method does return the use count of the document index with the given id.
361      *
362      * @param wordSourceId WordSourceId of document index of which to retrieve use count
363      *
364      * @return the use count of the document with the given id
365      *
366      * @throws IllegalArgumentException if wordSourceId is <tt>null</tt>
367      * @throws IndexPersistenceException when fails to retrieve document index use count in the persistence or the
368      * document with the given id does not exist in the persistence
369      */
370     public int getDocumentUseCount(final WordSourceId wordSourceId) throws IndexPersistenceException {
371         //arg checking and everything else is in common method
37218        return CommonDatabaseOperations.getDocumentUseCount(this, wordSourceId);
373     }
374  
375     /**
376      * This method does return the set of WordSourceId of documents that exist in this persistence instance.
377      * <p/>
378      * CS section 1.4.1.1 describes the algorithm of this method.
379      *
380      * @return set of WordSourceId of documents that exist in this persistence
381      *
382      * @throws IndexPersistenceException when the retrieval fails
383      */
384     public Set getIndexedDocuments() throws IndexPersistenceException {
385         //arg checking and everything else is in common method
3863        return CommonDatabaseOperations.getIndexedDocuments(this);
387     }
388  
389     /**
390      * This method writes the word data from the given set to the given writer using the given delimiter.
391      *
392      * @param wordSet the set of words to be written as set of {@link java.util.Map.Entry} having a {@link
393      * CollationKey}as key and a {@link List} of {@link Integer}s as values
394      * @param writer the writer to write to
395      * @param posDelim the delimiter to be used to separate entries in stream
396      *
397      * @throws IOException in case some I/O operation fails
398      */
399     private void writeWordDataToWriter(final Set wordSet, final Writer writer, final String posDelim)
400         throws IOException {
40134        for (Iterator iterator = wordSet.iterator(); iterator.hasNext();) {
4021096            final Map.Entry entry = (Map.Entry) iterator.next();
403  
404             // write the word
4051096            final String word = ((CollationKey) entry.getKey()).getSourceString();
406  
407             // write out all positions
4081096            final List indices = (List) entry.getValue();
4091096            writer.write(word);
4101096            writer.write(posDelim);
4111096            for (Iterator iterator1 = indices.iterator(); iterator1.hasNext();) {
4122288                final Integer index = (Integer) iterator1.next();
4132288                writer.write(index.toString());
4142288                writer.write(posDelim);
415             }
416             //end the record of the current word by writing second delimiter
4171096            writer.write(posDelim);
418         }
41934        writer.flush();
42034        writer.close();
42134    }
422  
423     /**
424      * This method reads the serialized word data (as written by {@link #writeWordDataToWriter(java.util.Set,
425      * java.io.Writer, String)}) back into a map containing keys of type {@link CollationKey} and values that are {@link
426      * List}s of {@link Integer}s.
427      *
428      * @param characterStream the stream to read from
429      * @param delim the delimiter to be used to determine entry boundaries
430      * @param collator the collator to be used to convert Strings to {@link java.text.CollationKey}s
431      *
432      * @return the map containing the data parsed from the given stream
433      *
434      * @throws IOException in case some I/O operation fails
435      * @throws IndexPersistenceException in case unexpected data is encountered in the stream
436      */
437     private Map readWordDataFromReader(final Reader characterStream, final String delim, final Collator collator) throws
438         IOException, IndexPersistenceException {
43922        final Map words = new HashMap();
44022        final BufferedReader bufferedReader = new BufferedReader(characterStream);
44122        int c = -1;
442         // the following code is a simple state machine that finds the occurrences of
443         // delimiters and puts the data into the words map
444  
44522        final char[] delimChars = delim.toCharArray();
446         // the string buffer collects all characters encountered since last delimiter occurrence
44722        StringBuffer item = new StringBuffer();
448         // the index of how many of the last characters matched the delimiter sequence,
449         // when this gets as large as delimChars.length, a delimiter has been found
45022        int delimIdx = 0;
451         // the current word, null until the word end delimiter has been found,
452         // if not null indicates that values read are indices for word
45322        String word = null;
454  
455         // the indices for the current word
45622        List indices = new ArrayList();
457  
458         // state flag signaling that one delimiter has been fully read, if the delimiter is
459         // then immediately followed by a second delimiter, the word entry is over
460         // and word plus indices are put to map
46122        boolean haveOneDelim = false;
46213745        while ((c = bufferedReader.read()) != -1) {
46313723            item.append((char) c);
464  
46513723            if (c == delimChars[delimIdx]) {
4664301                delimIdx++; //match next delim char
467             } else {
4689422                delimIdx = 0; //does not match delim, so reset
4699422                haveOneDelim = false;
470             }
471  
47213723            if (delimIdx == delimChars.length) {
473                 // delim has been matched
4744301                final String entry = item.toString().substring(0, item.length() - delimChars.length);
4754301                if (word == null) {
4761061                    word = entry;
4773240                } else if (haveOneDelim) {
478                     //second delim matched, so word entry is full
4791060                    final CollationKey key = collator.getCollationKey(word);
4801060                    words.put(key, indices);
4811060                    indices = new ArrayList();
4821060                    word = null;
483                 } else {
484                     try {
4852180                        indices.add(Integer.valueOf(entry.trim()));
4860                    } catch (NumberFormatException e) {
4870                        throw new IndexPersistenceException(
488                             "Unable to parse a index string [" + entry + "] into a valid int.", e);
4892180                    }
490                 }
4914301                item = new StringBuffer();
4924301                delimIdx = 0;
4934301                haveOneDelim = true;
494             }
495         }
49622        if (item.length() > 0) {
4970            throw new IndexPersistenceException("Illegal end of stream detected during query");
498         }
49922        return words;
500     }
501 }

this report was generated by version 1.0.5 of jcoverage.
visit www.jcoverage.com for updates.

copyright © 2003, jcoverage ltd. all rights reserved.
Java is a trademark of Sun Microsystems, Inc. in the United States and other countries.