+ new Lock.With(directory.makeLock("my.lock")) { + public Object doBody() { + ... code to execute while locked ... } -
yyyyMMddHHmmssSSS
or shorter,
- depeding on resolution
- yyyyMMddHHmmssSSS
or shorter,
- depeding on resolution
; using UTC as timezone
- timeToString
or
- DateToString
back to a time, represented as the
- number of milliseconds since January 1, 1970, 00:00:00 GMT.
-
- dateString
is not in the timeToString
or
- DateToString
back to a time, represented as a
- Date object.
-
- dateString
is not in the 2004-09-21 13:50:11
- will be changed to 2004-09-01 00:00:00
when using
- Resolution.MONTH
.
-
- resolution
- set to 0 or 1
- 1095767411000
- (which represents 2004-09-21 13:50:11) will be changed to
- 1093989600000
(2004-09-01 00:00:00) when using
- Resolution.MONTH
.
-
- resolution
- set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
- Adds a field to a document. Several fields may be added with - the same name. In this case, if the fields are indexed, their text is - treated as though appended for the purposes of search.
-Note that add like the removeField(s) methods only makes sense - prior to adding a document to an index. These methods cannot - be used to change the content of an existing index! In order to achieve this, - a document has to be deleted from an index and a new changed version of that - document has to be added.
-Removes field with the specified name from the document. - If multiple fields exist with this name, this method removes the first field that has been added. - If there is no field with the specified name, the document remains unchanged.
-Note that the removeField(s) methods like the add method only make sense - prior to adding a document to an index. These methods cannot - be used to change the content of an existing index! In order to achieve this, - a document has to be deleted from an index and a new changed version of that - document has to be added.
-Removes all fields with the given name from the document. - If there is no field with the specified name, the document remains unchanged.
-Note that the removeField(s) methods like the add method only make sense - prior to adding a document to an index. These methods cannot - be used to change the content of an existing index! In order to achieve this, - a document has to be deleted from an index and a new changed version of that - document has to be added.
-null
.
-
- Field[]
array
- null
.
-
- String[]
of field values
- null
if no
- binary fields with the specified name are available.
-
- byte[][]
of binary field values.
- null
- if no binary fields with the specified name are available.
- There may be non-binary fields with the same name.
-
- byte[]
containing the binary field value.
- value
should be stored in the index
-
- Whether the field should be indexed, and if so, if it should
- be tokenized before indexing
-
- null
value
should be stored in the index
-
- Whether the field should be indexed, and if so, if it should
- be tokenized before indexing
-
- Whether term vector should be stored
-
- null
TermVector.YES
null
null
value
should be stored (compressed or not.)
-
- Store.NO
stored
attribute instead.
-
- dir
or name
is null file
is the string by which the
- sub-stream will be known in the compound stream.
-
- file
is null FilterIndexReader
contains another IndexReader, which it
- uses as its basic source of data, possibly transforming the data along the
- way or providing additional functionality. The class
- FilterIndexReader
itself simply implements all abstract methods
- of IndexReader
with versions that pass all requests to the
- contained index reader. Subclasses of FilterIndexReader
may
- further override some of these methods and may also provide additional
- methods and fields.
- true
if an index exists at the specified directory.
- If the directory does not exist or if there is no index in it.
- false
is returned.
- true
if an index exists; false
otherwise
- true
if an index exists at the specified directory.
- If the directory does not exist or if there is no index in it.
- true
if an index exists; false
otherwise
- true
if an index exists at the specified directory.
- If the directory does not exist or if there is no index in it.
- true
if an index exists; false
otherwise
- n
th
- Document
in this index.
- t
. docNum
. Once a document is
- deleted it will not appear in TermDocs or TermPostitions enumerations.
- Attempts to read its field with the {@link #document}
- method will result in an error. The presence of this document may still be
- reflected in the {@link #docFreq} statistic, though
- this will be corrected eventually as the index is further modified.
- docNum
.
- Applications should call {@link #DeleteDocument(int)} or {@link #DeleteDocuments(Term)}.
- term
.
- This is useful if one uses a document field to hold a unique ID string for
- the document. Then to delete such a document, one merely constructs a
- term with the appropriate field and the unique ID string as its text and
- passes it to this method.
- See {@link #Delete(int)} for information about when this deletion will
- become effective.
- true
iff the index in the named directory is
- currently locked.
- true
iff the index in the named directory is
- currently locked.
- Construct a FilterIndexReader based on the specified base reader. - Directory locking for delete, undeleteAll, and setNorm operations is - left to the base reader.
-Note that base reader is closed if this FilterIndexReader is closed.
+*
, which
+ matches any character sequence (including the empty one), and ?
,
+ which matches any single character. Note this query can be slow, as it
+ needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
+ a Wildcard term should not start with one of the wildcards *
or
+ ?
.
+
+ This query uses the {@link
+ MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ rewrite method.
.f
+ a number.
- Also note that two of Lucene's files (deletable
and
- segments
) don't have any filename extension.
- true
to create the index or overwrite the existing one;
- false
to append to the existing index
-
- true
to create the index or overwrite the existing one;
- false
to append to the existing index
-
- true
to create the index or overwrite the existing one;
- false
to append to the existing index
-
- b
. Documents
+ matching this clause will (in addition to the normal weightings) have
+ their score multiplied by b
.
+ b
. The boost is 1.0 by default.
+ field
assumed to be the
+ default field and omitted.
+ The representation used is one that is supposed to be readable
+ by {@link Lucene.Net.QueryParsers.QueryParser QueryParser}. However,
+ there are the following limitations:
+ term
.
- This is useful if one uses a document field to hold a unique ID string for
- the document. Then to delete such a document, one merely constructs a
- term with the appropriate field and the unique ID string as its text and
- passes it to this method. Returns the number of documents deleted.
+ term
.docNum
.Scorer
iterates over documents matching a
+ query in increasing order of doc Id.
+
+
+ Document scores are computed using a given Similarity
+ implementation.
+
+
+ NOTE: The values Float.Nan,
+ Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are
+ not valid scores. Certain collectors (eg {@link
+ TopScoreDocCollector}) will not properly collect hits
+ with these scores.
numUniqueTerms/interval
terms are read into
- memory by an IndexReader, and, on average, interval/2
terms
- must be scanned for each random term access.
+ path
.
- Text will be analyzed with a
. If create
- is true, then a new, empty index will be created in
- path
, replacing the index already there, if any.
+ true
to create the index or overwrite
- the existing one; false
to append to the existing
- index
-
- create
is
- false
- path
.
- Text will be analyzed with a
. If create
- is true, then a new, empty index will be created in
- path
, replacing the index already there, if any.
+ true
to create the index or overwrite
- the existing one; false
to append to the existing
- index
-
- create
is
- false
- d
.
- Text will be analyzed with a
. If create
- is true, then a new, empty index will be created in
- d
, replacing the index already there, if any.
+ NOTE: in 3.0 this method will become abstract, following the removal
+ of {@link #Next()}. For backward compatibility it is implemented as:
- true
to create the index or overwrite
- the existing one; false
to append to the existing
- index
-
- create
is
- false
- After this completes, the index is optimized.
-The provided IndexReaders are not closed.
-MultipleTermPositions
here.
-
- MultipleTermPositions
instance.
-
- - { pq.top().change(); pq.adjustTop(); } -instead of
- { o = pq.pop(); o.change(); pq.push(o); } ++ public int nextDoc() throws IOException { + return next() ? doc() : NO_MORE_DOCS; + }-
Construct a MultiReader aggregating the named set of (sub)readers. - Directory locking for delete, undeleteAll, and setNorm operations is - left to the subreaders.
-Note that all subreaders are closed if this Multireader is closed.
++ int advance(int target) { + int doc; + while ((doc = nextDoc()) < target) { + } + return doc; + } ++ + Some implementations are considerably more efficient than that. + + NOTE: certain implemenations may return a different value (each + time) if called several times in a row with the same target. + + NOTE: this method may be called with {@value #NO_MORE_DOCS} for + efficiency by some Scorers. If your implementation cannot efficiently + determine that it should exhaust, it is recommended that you check for that + value in each call to this method. + + NOTE: after the iterator has exhausted you should not call this + method, as it may result in unpredicted behavior. + + NOTE: in 3.0 this method will become abstract, following the removal + of {@link #SkipTo(int)}. +
Similarity
implementation used by this scorer.
- firstDocID
is added to ensure that {@link #NextDoc()}
+ was called before this method.
+
+ Scorer
implements {@link Scorer#SkipTo(int)}.
+ ReqOptScorer
.getFloats()
and makes those values
+ available as other numeric types, casting as needed.
+
+
+ WARNING: The status of the Search.Function package is experimental.
+ The APIs introduced here might change in the future and will not be
+ supported anymore in such a case.
Zero
value.
+
+
+ WARNING: The status of the Search.Function package is experimental.
+ The APIs introduced here might change in the future and will not be
+ supported anymore in such a case.
+
+ NOTE: with the switch in 2.9 to segment-based
+ searching, if {@link #getValues} is invoked with a
+ composite (multi-segment) reader, this can easily cause
+ double RAM usage for the values in the FieldCache. It's
+ best to switch your application to pass only atomic
+ (single segment) readers to this API. Alternatively, for
+ a short-term fix, you could wrap your ValueSource using
+ {@link MultiValueSource}, which costs more CPU per lookup
+ but will not consume double the FieldCache RAM.
Float.NaN
if this
+ DocValues instance does not contain any value.
+
+ This operation is optional
+
+
+ Float.NaN
if this
+ DocValues instance does not contain any value.
+ Float.NaN
if this
+ DocValues instance does not contain any value.
+
+ This operation is optional
+
+
+ Float.NaN
if this
+ DocValues instance does not contain any value.
+ Float.NaN
if this
+ DocValues instance does not contain any value. *
+
+ This operation is optional
+
+
+ Float.NaN
if this
+ DocValues instance does not contain any value
+ o
is equal to this. Weight
is used in the following way:
+ Weight
is constructed by a top-level query, given a
+ Searcher
({@link Query#CreateWeight(Searcher)}).Weight
to compute the query normalization factor
+ {@link Similarity#QueryNorm(float)} of the query clauses contained in the
+ query.Scorer
is constructed by {@link #Scorer(IndexReader,boolean,boolean)}.scoreDocsInOrder
.
+
+ NOTE: even if scoreDocsInOrder
is false, it is
+ recommended to check whether the returned Scorer
indeed scores
+ documents out of order (i.e., call {@link #ScoresDocsOutOfOrder()}), as
+ some Scorer
implementations will always return documents
+ in-order.false
, i.e.
+ the Scorer
scores documents in-order.
+ type
param tells how to parse the field string values into a numeric score value.
+ smis
- contains segments that are positioned at the same term. N
- is the number of cells in the array actually occupied.
+ null
or
+ {@linkplain #EMPTY_DOCIDSET}.iterator()
if there
+ are no docs that match.
+ DocIdSet
+ should be cached without copying it into a BitSet. The default is to return
+ false
. If you have an own DocIdSet
implementation
+ that does its iteration very effective and fast without doing disk I/O,
+ override this method and return true.
+ getTerms
- method. Each location in the array contains the number of times this
- term occurs in the document or the document field.
- getTerms
at which the term with the specified
- term
appears. If this term does not appear in the array,
- return -1.
- indexOf(int)
but searches for a number of terms
- at the same time. Returns an array that has the same size as the number
- of terms searched for, each slot containing the result of searching for
- that term number.
-
- TermFreqVector
to provide additional information about
- positions in which each of the terms is found. A TermPositionVector not necessarily
- contains both positions and offsets, but at least one of these arrays exists.
- indexOf
method.
- May return null if positions have not been stored.
- indexOf
method.
-
- for each document
- {
- writer.openDocument();
- for each field on the document
- {
- writer.openField(field);
- for all of the terms
- {
- writer.addTerm(...)
- }
- writer.closeField
- }
- writer.closeDocument()
- }
-
-
- OR_OPERATOR
) terms without any modifiers
- are considered optional: for example capital of Hungary
is equal to
- capital OR of OR Hungary
.AND_OPERATOR
mode terms are considered to be in conjuction: the
- above mentioned query is parsed as capital AND of AND Hungary
- true
.
- \
.
- It will, when parse(String query)
- is called, construct a query like this (assuming the query consists of
- two terms and you specify the two fields title
and body
):
- (title:term1 body:term1) (title:term2 body:term2)
-
-
- When setDefaultOperator(AND_OPERATOR) is set, the result will be:
- -
- +(title:term1 body:term1) +(title:term2 body:term2)
-
-
- In other words, all the query's terms must appear, but it doesn't matter in - what fields they appear.
-b
. Documents
- matching this clause will (in addition to the normal weightings) have
- their score multiplied by b
.
- b
. The boost is 1.0 by default.
- match
whose end
- position is less than or equal to end
.
- slop
total unmatched positions between
- them. * When inOrder
is true, the spans from each clause
- must be * ordered as in clauses
.
- o
is equal to this. include
which
- have no overlap with spans from exclude
.
- o
is equal to this. Similarity
implementation used by this scorer.
-
- o
is equal to this. o
is equal to this. SHOULD
- subqueries, at least one of the queries must appear in the matching documents.
- MUST_NOT
query.
- - By default no optional clauses are neccessary for a match - (unless there are no required clauses). If this method is used, - then the specified numebr of clauses is required. -
-- Use of this method is totally independant of specifying that - any specific clauses are required (or prohibited). This number will - only be compared against the number of matching optional clauses. -
-- EXPERT NOTE: Using this method will force the use of BooleanWeight2, - regardless of wether setUseScorer14(true) has been called. -
- -o
is equal to this. ConjunctionScorer
.
- This Scorer implements {@link Scorer#SkipTo(int)} and uses skipTo() on the given Scorers.
- DisjunctionScorer
, using one as the minimum number
- of matching subscorers.
- scorerQueue
.
- PriorityQueue
that orders by {@link Scorer#Doc()}. o
is equal to this. true
if the lower endpoint is inclusive true
if the upper endpoint is inclusive o
is equal to this. 1/sqrt(numTerms)
. 1/sqrt(sumOfSquaredWeights)
. sqrt(freq)
. 1 / (distance + 1)
. log(numDocs/(docFreq+1)) + 1
. overlap / maxOverlap
. field
as integers and returns an array
- of size reader.maxDoc()
of the value each document
- has in the given field.
- field
as integers and returns an array of
- size reader.maxDoc()
of the value each document has in the
- given field.
- field
as floats and returns an array
- of size reader.maxDoc()
of the value each document
- has in the given field.
- field
as floats and returns an array
- of size reader.maxDoc()
of the value each document
- has in the given field.
- field
and returns an array
- of size reader.maxDoc()
containing the value each document
- has in the given field.
- field
and returns
- an array of them in natural order, along with an array telling
- which element in the term array each document uses.
- field
to see if it contains integers, floats
- or strings, and then calls one of the other methods in this class to get the
- values. For string values, a StringIndex is returned. After
- calling this method, there is an entry in the cache for both
- type AUTO
and the actual found type.
- field
and calls the given SortComparator
- to get the sort values. A hit in the cache will happen if reader
,
- field
, and comparator
are the same (using equals()
)
- as a previous call to this method.
- null
.
- This is to handle the case using ParallelMultiSearcher where the
- original list contains AUTO and we don't know the actual sort
- type until the values come back. The fields can only be set once.
- This method is thread safe.
- null
. The collators
- correspond to any SortFields which were given a specific locale.
- null
.
- a
is less relevant than b
.true
if document a
should be sorted after document b
.
- null
or empty.
-
- The number of hits to retain. Must be greater than zero.
+
a
is less relevant than b
.true
if document a
should be sorted after document b
.
- SortField.SCORE
, SortField.DOC
, SortField.STRING
, SortField.INTEGER
,
- SortField.FLOAT
or SortField.CUSTOM
. It is not valid to return SortField.AUTO
.
- This is used by multisearchers to determine how to collate results from their searchers.
+ + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } ++ Some implementations are considerably more efficient than that. +
SegmentInfos
) to do its job;
+ if you implement your own MergePolicy, you'll need to put
+ it in package Lucene.Net.Index in order to use
+ these APIs.
+ null
.
+ segments_N
) associated
+ with this commit point.
+ null
.
+ the offset in the array to start storing bytes
+
+ the number of bytes to read
+
+ fieldName
should be loaded.
+ o
is equal to this. TokenStream
enumerates the sequence of tokens, either from
+ {@link Field}s of a {@link Document} or from query text.
+
+ This is an abstract class. Concrete subclasses are:
+ TokenStream
whose input is a Reader; andTokenStream
whose input is another
+ TokenStream
.TokenStream
API has been introduced with Lucene 2.9. This API
+ has moved from being {@link Token} based to {@link Attribute} based. While
+ {@link Token} still exists in 2.9 as a convenience class, the preferred way
+ to store the information of a {@link Token} is to use {@link AttributeImpl}s.
+
+ TokenStream
now extends {@link AttributeSource}, which provides
+ access to all of the token {@link Attribute}s for the TokenStream
.
+ Note that only one instance per {@link AttributeImpl} is created and reused
+ for every token. This approach reduces object creation and allows local
+ caching of references to the {@link AttributeImpl}s. See
+ {@link #IncrementToken()} for further details.
+
+ The workflow of the new TokenStream
API is as follows:
+ TokenStream
/{@link TokenFilter}s which add/get
+ attributes to/from the {@link AttributeSource}.TokenStream
TokenStream
+ , e. g. for buffering purposes (see {@link CachingTokenFilter},
+ {@link TeeSinkTokenFilter}). For this usecase
+ {@link AttributeSource#CaptureState} and {@link AttributeSource#RestoreState}
+ can be used.
+ public Iterator<Class<? extends Attribute>> getAttributeClassesIterator()
+
+ Note that this return value is different from Java in that it enumerates over the values
+ and not the keys
+ public Iterator<AttributeImpl> getAttributeImplsIterator()
+ public <T extends Attribute> T addAttribute(Class<T>)
+ public boolean hasAttribute(Class<? extends Attribute>)
+ public <T extends Attribute> T getAttribute(Class<T>)
+
+ public AttributeImpl createAttributeInstance(Class%lt;? extends Attribute> attClass)
+ Impl
to it.
+ TokenStream
s use the new API and
+ implement {@link #IncrementToken}. This setting can only be enabled
+ globally.
+
+ This setting only affects TokenStream
s instantiated after this
+ call. All TokenStream
s already created use the other setting.
+
+ All core {@link Analyzer}s are compatible with this setting, if you have
+ your own TokenStream
s that are also compatible, you should enable
+ this.
+
+ When enabled, tokenization may throw {@link UnsupportedOperationException}
+ s, if the whole tokenizer chain is not compatible eg one of the
+ TokenStream
s does not implement the new TokenStream
API.
+
+ The default is false
, so there is the fallback to the old API
+ available.
+
+ false
+ (using the new TokenStream
API). Streams implementing the old API
+ should upgrade to use this feature.
+
+ This method can be used to perform any end-of-stream operations, such as
+ setting the final offset of a stream. The final offset of a stream might
+ differ from the offset of the last token eg in case one or more whitespaces
+ followed after the last token, but a {@link WhitespaceTokenizer} was used.
+
+ TokenStream
are intended to be consumed more than once, it is
+ necessary to implement {@link #Reset()}. Note that if your TokenStream
+ caches tokens and feeds them back again after a reset, it is imperative
+ that you clone the tokens when you store them away (on the first pass) as
+ well as when you return them (on future passes after {@link #Reset()}).
+ currentOff
.
+ end()
on the
+ input TokenStream.
+ NOTE: Be sure to call super.end()
first when overriding this method.
+ int
field:
+
+ + Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + document.add(field); ++ + For optimal performance, re-use the TokenStream and Field instance + for more than one document: + +
+ NumericTokenStream stream = new NumericTokenStream(precisionStep); + Field field = new Field(name, stream); + field.setOmitNorms(true); + field.setOmitTermFreqAndPositions(true); + Document document = new Document(); + document.add(field); + + for(all documents) { + stream.setIntValue(value) + writer.addDocument(document); + } ++ + This stream is not intended to be used in analyzers; + it's more for iterating the different precisions during + indexing a specific numeric value. + + NOTE: as token streams are only consumed once + the document is added to the index, if you index more + than one numeric field, use a separate
NumericTokenStream
+ instance for each.
+
+ See {@link NumericRangeQuery} for more details on the
+ precisionStep
+ parameter as well as how numeric fields work under the hood.
+
+ NOTE: This API is experimental and
+ might change in incompatible ways in the next release.
+
+ precisionStep
+ {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
+ before using set a value using the various set???Value() methods.
+ precisionStep
. The stream is not yet initialized,
+ before using set a value using the various set???Value() methods.
+ precisionStep
using the given {@link AttributeSource}.
+ The stream is not yet initialized,
+ before using set a value using the various set???Value() methods.
+ precisionStep
using the given
+ {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
+ The stream is not yet initialized,
+ before using set a value using the various set???Value() methods.
+ long
value.new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))
+ int
value.new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))
+ double
value.new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))
+ float
value.new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))
+ singleMatch
occurs in
+ the input, it will be replaced with
+ replacement
.
+
+ term
.
+ Called by search code to compute term weights.
+ term
. Returns an array with these
+ document frequencies. Used to minimize number of remote calls.
+ n
+ hits for query
, applying filter
if non-null.
+
+ Called by {@link Hits}.
+
+ Applications should usually call {@link Searcher#Search(Query)} or
+ {@link Searcher#Search(Query,Filter)} instead.
+ i
.
+ Called by {@link HitCollector} implementations.
+ n
th position. The {@link Lucene.Net.Documents.FieldSelector}
+ may be used to determine what {@link Lucene.Net.Documents.Field}s to load and how they should be loaded.
+
+ NOTE: If the underlying Reader (more specifically, the underlying FieldsReader
) is closed before the lazy {@link Lucene.Net.Documents.Field} is
+ loaded an exception may be thrown. If you want the value of a lazy {@link Lucene.Net.Documents.Field} to be available after closing you must
+ explicitly load it or fetch the Document again with a new loader.
+
+
+ n
th position
+
+ The {@link Lucene.Net.Documents.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
+
+ doc
scored against
+ weight
.
+
+ This is intended to be used in developing Similarity implementations,
+ and, for good performance, should not be displayed with every hit.
+ Computing an explanation is as expensive as executing the query over the
+ entire index.
+ Applications should call {@link Searcher#Explain(Query, int)}.
+ n
hits for query
, applying
+ filter
if non-null, and sorting the hits by the criteria in
+ sort
.
+
+ Applications should usually call
+ {@link Searcher#Search(Query,Filter,int,Sort)} instead.
+
+ MultiTermQueryWrapperFilter
is not designed to
+ be used by itself. Normally you subclass it to provide a Filter
+ counterpart for a {@link MultiTermQuery} subclass.
+
+ For example, {@link TermRangeFilter} and {@link PrefixFilter} extend
+ MultiTermQueryWrapperFilter
.
+ This class also provides the functionality behind
+ {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE};
+ this is why it is not abstract.
+ collator
parameter will cause every single
+ index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ examined. Depending on the number of index Terms in this Field, the
+ operation could be very slow.
+
+ fieldName
matching
+ less than or equal to upperTerm
.
+ fieldName
matching
+ greater than or equal to lowerTerm
.
+ term
. minimumSimilarity
to term
.
@@ -5009,389 +3843,2310 @@
is to match that term.
maxSize+1
, in {@link #initialize}.
+
+ // extends getSentinelObject() to return a non-null value. + PriorityQueue pq = new MyQueue(numHits); + // save the 'top' element, which is guaranteed to not be null. + MyObject pqTop = (MyObject) pq.top(); + <...> + // now in order to add a new element, which is 'better' than top (after + // you've verified it is better), it is as simple as: + pqTop.change(). + pqTop = pq.updateTop(); ++ + NOTE: if this method returns a non-null value, it will be called by + {@link #Initialize(int)} {@link #Size()} times, relying on a new object to + be returned and will not check if it's null again. Therefore you should + ensure any call to this method creates a new instance and behaves + consistently, e.g., it cannot return null if it previously returned + non-null. +
+ pq.top().change(); + pq.adjustTop(); ++ + instead of + +
+ o = pq.pop(); + o.change(); + pq.push(o); ++ +
+ pq.top().change(); + pq.updateTop(); ++ + instead of + +
+ o = pq.pop(); + o.change(); + pq.push(o); ++ +
getInts()
and makes those values
+ available as other numeric types, casting as needed.
+
+
+ WARNING: The status of the Search.Function package is experimental.
+ The APIs introduced here might change in the future and will not be
+ supported anymore in such a case.
min
has been retrieved.
- - Caution: Iterate only over the hits needed. Iterating over all - hits is generally not desirable and may be the source of - performance issues. -
-term
.
- Called by search code to compute term weights.
- term
. Returns an array with these
- document frequencies. Used to minimize number of remote calls.
- i
.
- Called by {@link HitCollector} implementations.
- query
. query
and
- filter
.
- query
sorted by
- sort
.
- query
and filter
,
- sorted by sort
.
- query
o
is equal to this. n
in the array
- used to construct this searcher.
- n
within its
- sub-index.
- + public String toString() { + return "start=" + startOffset + ",end=" + endOffset; + } ++ + This method may be overridden by subclasses. +
+ public int hashCode() { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } ++ + see also {@link #equals(Object)} +
+ return reusableToken.reinit(string, startOffset, endOffset[, type]); ++
+ return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]); ++
+ return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]); ++
+ return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]); ++
+ return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]); ++
TokenStreams
can be chained, one cannot assume that the Token's
current type is correct.getDirectory
methods default to use
+ {@link SimpleFSLockFactory} for backwards compatibility.
+ The system properties
+ org.apache.lucene.store.FSDirectoryLockFactoryClass
+ and org.apache.lucene.FSDirectory.class
+ are deprecated and only used by the deprecated
+ getDirectory
methods. The system property
+ org.apache.lucene.lockDir
is ignored completely,
+ If you really want to store locks
+ elsewhere, you can create your own {@link
+ SimpleFSLockFactory} (or {@link NativeFSLockFactory},
+ etc.) passing in your preferred lock directory.
+
+ In 3.0 this class will become abstract.
+
+ true
, call {@link #Close()} method on source directory
+
+ getDirectory
+ respect this setting.
+ Integer.MAX_VALUE
.
+ + Searcher searcher = new IndexSearcher(indexReader); + final BitSet bits = new BitSet(indexReader.maxDoc()); + searcher.search(query, new Collector() { + private int docBase; + + // ignore scorer + public void setScorer(Scorer scorer) { + } + + // accept docs out of order (for a BitSet it doesn't matter) + public boolean acceptsDocsOutOfOrder() { + return true; + } + + public void collect(int doc) { + bits.set(doc + docBase); + } + + public void setNextReader(IndexReader reader, int docBase) { + this.docBase = docBase; + } + }); ++ + Not all collectors will need to rebase the docID. For + example, a collector that simply counts the total number + of hits would skip it. + + NOTE: Prior to 2.9, Lucene silently filtered + out hits with score <= 0. As of 2.9, the core Collectors + no longer do that. It's very unusual to have such hits + (a negative query boost, or function query returning + negative custom scores, could cause it to happen). If + you need that behavior, use {@link + PositiveScoresOnlyCollector}. + + NOTE: This API is experimental and might change + in incompatible ways in the next release. + +
true
if this collector does not
+ * require the matching docIDs to be delivered in int sort
+ * order (smallest to largest) to {@link #collect}.
+ *
+ * Most Lucene Query implementations will visit
+ * matching docIDs in order. However, some queries
+ * (currently limited to certain cases of {@link
+ * BooleanQuery}) can achieve faster searching if the
+ * Collector
allows them to deliver the
+ * docIDs out of order.
+ *
+ * Many collectors don't mind getting docIDs out of
+ * order, so it's important to return true
+ * here.
+ *
+ o
is equal to this. app*
.
- prefix
. o
is equal to this. query
.
- - This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter - -
-fieldName
matching
- less than or equal to upperTerm
.
- fieldName
matching
- greater than or equal to lowerTerm
.
- o
is equal to this. [010 TO 120]
.
-
- lowerTerm
but less than upperTerm
.
- There must be at least one term and either term may be null,
- in which case there is no bound on that side, but if there are
- two terms, both terms must be for the same field.
- true
if the range query is inclusive o
is equal to this. ReqExclScorer
.ReqOptScorer
.field
then by index order (document
- number). The type of value in field
is determined
- automatically.
-
- field
then by
- index order (document number). The type of value in field
is
- determined automatically.
-
- field
is determined automatically.
-
- field
then by index order
- (document number).
- field
possibly in reverse,
- then by index order (document number).
- null
.
+ Name of field to sort by, cannot be
+ null
.
+ null
.
+
+ Returns a comparator for sorting hits.
+
null
.
+
+ Returns a comparator for sorting hits.
+
+ True if natural order should be reversed.
+
null
@@ -6163,459 +7110,4087 @@
null
.
null
if no parser was specified. Sorting is using the default parser then.
+ null
.
+ t
. o
is equal to this. Scorer
for documents matching a Term
.TermScorer
. *
, which
- matches any character sequence (including the empty one), and ?
,
- which matches any single character. Note this query can be slow, as it
- needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
- a Wildcard term should not start with one of the wildcards *
or
- ?
.
+ o
is equal to this. If a
+ {@link SortComparatorSource} (deprecated) or {@link
+ FieldCache.Parser} was provided, it must properly
+ implement equals (unless a singleton is always used).
o
is equal to this. If a
+ {@link SortComparatorSource} (deprecated) or {@link
+ FieldCache.Parser} was provided, it must properly
+ implement hashCode (unless a singleton is always
+ used).
Lucene.Net.lockDir
- or java.io.tmpdir
system property
+ open()
methods, e.g. {@link
+ #Open(String, boolean)}.
+ For efficiency, in this API documents are often referred to via
+ document numbers, non-negative integers which each name a unique
+ document in the index. These document numbers are ephemeral--they may change
+ as documents are added to and deleted from an index. Clients should thus not
+ rely on a given document having the same number between sessions.
+ An IndexReader can be opened on a directory for which an IndexWriter is
+ opened already, but it cannot be used to delete documents from the index then.
+
+ NOTE: for backwards API compatibility, several methods are not listed
+ as abstract, but have no useful implementations in this base class and
+ instead always throw UnsupportedOperationException. Subclasses are
+ strongly encouraged to override these methods, but in many cases may not
+ need to.
+
+
+ NOTE: as of 2.4, it's possible to open a read-only
+ IndexReader using one of the static open methods that
+ accepts the boolean readOnly parameter. Such a reader has
+ better concurrency as it's not necessary to synchronize on
+ the isDeleted method. Currently the default for readOnly
+ is false, meaning if not specified you will get a
+ read/write IndexReader. But in 3.0 this default will
+ change to true, meaning you must explicitly specify false
+ if you want to make changes with the resulting IndexReader.
+
+ NOTE: {@link
+ IndexReader
} instances are completely thread
+ safe, meaning multiple threads can call any of its methods,
+ concurrently. If your application requires external
+ synchronization, you should not synchronize on the
+ IndexReader
instance; use your own
+ (non-Lucene) objects instead.
+ + IndexReader reader = ... + ... + IndexReader newReader = r.reopen(); + if (newReader != reader) { + ... // reader was reopened + reader.close(); + } + reader = newReader; + ... ++ + Be sure to synchronize that code so that other threads, + if present, can never use reader after it has been + closed and before it's switched to newReader. + + NOTE: If this reader is a near real-time + reader (obtained from {@link IndexWriter#GetReader()}, + reopen() will simply call writer.getReader() again for + you, though this may change in the future. + +
true
if the index is optimized; false
otherwise
+ true
if an index exists at the specified directory.
+ If the directory does not exist or if there is no index in it.
+ false
is returned.
+ true
if an index exists; false
otherwise
+ true
if an index exists at the specified directory.
+ If the directory does not exist or if there is no index in it.
+ true
if an index exists; false
otherwise
+ true
if an index exists at the specified directory.
+ If the directory does not exist or if there is no index in it.
+ true
if an index exists; false
otherwise
+ n
th
+ Document
in this index.
+
+ NOTE: for performance reasons, this method does not check if the
+ requested document is deleted, and therefore asking for a deleted document
+ may yield unspecified results. Usually this is not required, however you
+ can call {@link #IsDeleted(int)} with the requested document ID to verify
+ the document is not deleted.
+
+ n
+ th position. The {@link FieldSelector} may be used to determine
+ what {@link Lucene.Net.Documents.Field}s to load and how they should
+ be loaded. NOTE: If this Reader (more specifically, the underlying
+ FieldsReader
) is closed before the lazy
+ {@link Lucene.Net.Documents.Field} is loaded an exception may be
+ thrown. If you want the value of a lazy
+ {@link Lucene.Net.Documents.Field} to be available after closing you
+ must explicitly load it or fetch the Document again with a new loader.
+
+ NOTE: for performance reasons, this method does not check if the
+ requested document is deleted, and therefore asking for a deleted document
+ may yield unspecified results. Usually this is not required, however you
+ can call {@link #IsDeleted(int)} with the requested document ID to verify
+ the document is not deleted.
+
+ n
th position
+
+ The {@link FieldSelector} to use to determine what
+ Fields should be loaded on the Document. May be null, in which case
+ all Fields will be loaded.
+
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ t
.term
. For each document, the document number, the frequency of
+ the term in that document is also provided, for use in
+ search scoring. If term is null, then all non-deleted
+ docs are returned with freq=1.
+ Thus, this method implements the mapping:
+ term
. For each document, in addition to the document number
+ and frequency of the term in that document, a list of all of the ordinal
+ positions of the term in the document is available. Thus, this method
+ implements the mapping:
+
+ docNum
. Once a document is
+ deleted it will not appear in TermDocs or TermPostitions enumerations.
+ Attempts to read its field with the {@link #document}
+ method will result in an error. The presence of this document may still be
+ reflected in the {@link #docFreq} statistic, though
+ this will be corrected eventually as the index is further modified.
+
+ write.lock
could not
+ be obtained)
+ docNum
.
+ Applications should call {@link #DeleteDocument(int)} or {@link #DeleteDocuments(Term)}.
+ term
indexed.
+ This is useful if one uses a document field to hold a unique ID string for
+ the document. Then to delete such a document, one merely constructs a
+ term with the appropriate field and the unique ID string as its text and
+ passes it to this method.
+ See {@link #DeleteDocument(int)} for information about when this deletion will
+ become effective.
+
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ true
iff the index in the named directory is
+ currently locked.
+ true
iff the index in the named directory is
+ currently locked.
+ segments_N
). This point in time, when the
+ action of writing of a new segments file to the directory
+ is completed, is an index commit.
+
+ Each index commit point has a unique segments file
+ associated with it. The segments file associated with a
+ later index commit point would have a larger N.
+
+ WARNING: This API is a new and experimental and
+ may suddenly change.
+ segments_N
) associated
+ with this commit point.
+ dir
or name
is null file
is the string by which the
+ sub-stream will be known in the compound stream.
+
+ file
is null Lock
holding the lock
+ stored
attribute instead.
+
+ + class MyAnalyzer extends Analyzer { + public final TokenStream tokenStream(String fieldName, Reader reader) { + return new PorterStemFilter(new LowerCaseTokenizer(reader)); + } + } +
RAMDirectory
instance from the {@link FSDirectory}.
+ File
specifying the index directory
+ offset as seen in the output
+ RAMDirectory
instance from the {@link FSDirectory}.
+ String
specifying the full index directory path
+ current offset
+ "new york"
.
+
+ This query may be combined with other terms or queries with a {@link BooleanQuery}.
+ WITHIN
or NEAR
operator.
+ The slop is in fact an edit-distance, where the units correspond to
+ moves of terms in the query phrase out of position. For example, to switch
+ the order of two words requires two moves (the first move places the words
+ atop one another), so to permit re-orderings of phrases, the slop must be
+ at least two.
+ More exact matches are scored higher than sloppier matches, thus search
+ results are sorted by exactness.
+ The slop is zero by default, requiring exact matches.
+ o
is equal to this. IndexSearcher
} instances are completely
+ thread safe, meaning multiple threads can call any of its
+ methods, concurrently. If your application requires
+ external synchronization, you should not
+ synchronize on the IndexSearcher
instance;
+ use your own (non-Lucene) objects instead.
+ query
. query
and
+ filter
.
+ query
sorted by
+ sort
.
+ query
and filter
,
+ sorted by sort
.
+ n
hits for query
, applying
+ filter
if non-null, and sorting the hits by the criteria in
+ sort
.
+
+ NOTE: this does not compute scores by default; use
+ {@link IndexSearcher#setDefaultFieldSortScoring} to enable scoring.
+
+ score
passed to this method is a raw score.
+ In other words, the score will not necessarily be a float whose value is
+ between 0 and 1.
+ score
passed to this method is a raw score.
+ In other words, the score will not necessarily be a float whose value is
+ between 0 and 1.
+ n
+ hits for query
, applying filter
if non-null.
+
+ n
+ hits for query
.
+
+ doc
scored against
+ query
.
+
+ This is intended to be used in developing Similarity implementations,
+ and, for good performance, should not be displayed with every hit.
+ Computing an explanation is as expensive as executing the query over the
+ entire index.
+ query
lowerTerm
+ but less/equal than upperTerm
.
+
+
+ If an endpoint is null, it is said
+ to be "open". Either or both endpoints may be open. Open endpoints may not
+ be exclusive (you can't select all but the first or last term without
+ explicitly specifying the term to exclude.)
+
+ lowerTerm
is
+ included in the range.
+
+ If true, the upperTerm
is
+ included in the range.
+
lowerTerm
but less/equal than upperTerm
.
+
+ If an endpoint is null, it is said
+ to be "open". Either or both endpoints may be open. Open endpoints may not
+ be exclusive (you can't select all but the first or last term without
+ explicitly specifying the term to exclude.)
+
+ If collator
is not null, it will be used to decide whether
+ index terms are within the given range, rather than using the Unicode code
+ point order in which index terms are stored.
+
+ WARNING: Using this constructor and supplying a non-null
+ value in the collator
parameter will cause every single
+ index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ examined. Depending on the number of index Terms in this Field, the
+ operation could be very slow.
+
+ lowerTerm
is
+ included in the range.
+
+ If true, the upperTerm
is
+ included in the range.
+
+ The collator to use to collate index Terms, to determine
+ their membership in the range bounded by lowerTerm
and
+ upperTerm
.
+
+ true
if the lower endpoint is inclusive true
if the upper endpoint is inclusive o
is equal to this. MUST
+ clauses one or more SHOULD
clauses must match a document
+ for the BooleanQuery to match.
+ MUST_NOT
clause.
+ MultipleTermPositions
instance.
+
+ document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));
+
+
+ Integer.MIN_VALUE
and Integer.MAX_VALUE
inclusive.
+ Documents which should appear first in the sort
+ should have low value integers, later documents high values
+ (i.e. the documents should be numbered 1..n
where
+ 1
is the first and n
the last).
+
+ Long term values should contain only digits and an optional
+ preceding negative sign. Values must be base 10 and in the range
+ Long.MIN_VALUE
and Long.MAX_VALUE
inclusive.
+ Documents which should appear first in the sort
+ should have low value integers, later documents high values.
+
+ Float term values should conform to values accepted by
+ {@link Float Float.valueOf(String)} (except that NaN
+ and Infinity
are not supported).
+ Documents which should appear first in the sort
+ should have low values, later documents high values.
+
+ String term values can contain any valid String, but should
+ not be tokenized. The values are sorted according to their
+ {@link Comparable natural order}. Note that using this type
+ of term value has higher memory requirements than the other
+ two types.
+
+ IndexReader.maxDoc()
for each field
+ name for which a sort is performed. In other words, the size of the
+ cache in bytes is:
+
+ 4 * IndexReader.maxDoc() * (# of different fields actually used to sort)
+
+ For String fields, the cache is larger: in addition to the
+ above array, the value of every term in the field is kept in memory.
+ If there are many unique terms in the field, this could
+ be quite large.
+
+ Note that the size of the cache is not affected by how many
+ fields are in the index and might be used to sort - only by
+ the ones actually used to sort a result set.
+
+ Created: Feb 12, 2004 10:53:57 AM
+
+ field
then by index order (document
+ number). The type of value in field
is determined
+ automatically.
+
+ field
then by
+ index order (document number). The type of value in field
is
+ determined automatically.
+
+ field
is determined automatically.
+
+ field
then by index order
+ (document number).
+ field
possibly in reverse,
+ then by index order (document number).
+ o
is equal to this. getBytes()
and makes those values
+ available as other numeric types, casting as needed.
+
+
+ WARNING: The status of the Search.Function package is experimental.
+ The APIs introduced here might change in the future and will not be
+ supported anymore in such a case.
+
+ bit
to one. bit
to true, and
+ returns true if bit was already set
+ bit
to zero. name
in Directory
d
, as written by the {@link #write} method.
results
is null it means there are no results to return,
+ either because there were 0 calls to collect() or because the arguments to
+ topDocs were invalid.
+ start
, you should call {@link #TopDocs()} and work
+ with the returned {@link TopDocs} object, which will contain all the
+ results this search execution collected.
+ numHits
.
+
+ trackDocScores
to true as well.
+
+ specifies whether documents are scored in doc Id order or not by
+ the given {@link Scorer} in {@link #SetScorer(Scorer)}.
+
+ new Integer(termtext)
. Note that this
+ might not always be the most efficient implementation - for this
+ particular example, a better implementation might be to make a
+ ScoreDocLookupComparator that uses an internal lookup table of int.
+ termtext
that sorts according to the natural order of termtext
.
+ i
should come before j
i
should come after j
0
if they are equal
+ SortField.SCORE
,
+ SortField.DOC
, SortField.STRING
,
+ SortField.INTEGER
, SortField.FLOAT
or
+ SortField.CUSTOM
. It is not valid to return
+ SortField.AUTO
.
+ This is used by multisearchers to determine how to collate results
+ from their searchers.
+ n
+ > current_{@link #Length()} (but n
< {@link #Length()}
+ _at_start).
+
+ + TopDocs topDocs = searcher.Search(query, numHits); + ScoreDoc[] hits = topDocs.scoreDocs; + for (int i = 0; i < hits.Length; i++) { + int docId = hits[i].doc; + Document d = searcher.Doc(docId); + // do something with current hit + ... ++
min
has been retrieved.
+ FieldCache.DEFAULT
for maintaining internal term lookup tables.
+
+ Created: Dec 8, 2003 12:56:03 PM
+
+ null
or empty.
+
+ The number of hits to retain. Must be greater than zero.
+
+ a
is less relevant than b
.true
if document a
should be sorted after document b
.
+ field
as a single byte and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+ field
as bytes and returns an array of
+ size reader.maxDoc()
of the value each document has in the
+ given field.
+ field
as shorts and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+ field
as shorts and returns an array of
+ size reader.maxDoc()
of the value each document has in the
+ given field.
+ field
as integers and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+ field
as integers and returns an array of
+ size reader.maxDoc()
of the value each document has in the
+ given field.
+ field
as floats and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+ field
as floats and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+ field
as longs and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+
+ field
as longs and returns an array of
+ size reader.maxDoc()
of the value each document has in the
+ given field.
+
+ field
as integers and returns an array
+ of size reader.maxDoc()
of the value each document
+ has in the given field.
+
+ field
as doubles and returns an array of
+ size reader.maxDoc()
of the value each document has in the
+ given field.
+
+ field
and returns an array
+ of size reader.maxDoc()
containing the value each document
+ has in the given field.
+ field
and returns
+ an array of them in natural order, along with an array telling
+ which element in the term array each document uses.
+ field
to see if it contains integers, longs, floats
+ or strings, and then calls one of the other methods in this class to get the
+ values. For string values, a StringIndex is returned. After
+ calling this method, there is an entry in the cache for both
+ type AUTO
and the actual found type.
+ field
and calls the given SortComparator
+ to get the sort values. A hit in the cache will happen if reader
,
+ field
, and comparator
are the same (using equals()
)
+ as a previous call to this method.
+ NOT_ANALYZED
field to ensure that
+ there is only a single term.
+
+ This class does not have an constructor, use one of the static factory methods available,
+ that create a correct instance for different data types supported by {@link FieldCache}.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ null
.
+ ConjunctionScorer
.
+ This Scorer implements {@link Scorer#SkipTo(int)} and uses skipTo() on the given Scorers.
+ TODO: Implement score(HitCollector, int).
+ currentSumScore
is the total score of the current matching doc,
+ nrMatchers
is the number of matching scorers,
+ and all scorers are after the matching doc, or are exhausted.
+ DisjunctionScorer
.minimumNrMatchers
is bigger than
+ the number of subScorers
,
+ no matches will be produced.
+ ConjunctionScorer
.
+
+ DisjunctionScorer
, using one as the minimum number
+ of matching subscorers.
+ scorerDocQueue
.
+ scorerDocQueue
.
+ Repeat until at least the minimum number of subscorers match on the same
+ document and all subscorers are after that document or are exhausted.
+ scorerDocQueue
has at least minimumNrMatchers
+ available. At least the scorer with the minimum document number will be advanced.
+ currentDoc
, currentSumScore
,
+ and nrMatchers
describe the match.
+
+ TODO: Investigate whether it is possible to use skipTo() when
+ the minimum number of matchers is bigger than one, ie. try and use the
+ character of ConjunctionScorer for the minimum number of matchers.
+ Also delay calling score() on the sub scorers until the minimum number of
+ matchers is reached.
+ +
) or a minus (-
) sign, indicating
+ that the clause is required or prohibited respectively; or+
/-
prefix to require any of a set of
+ terms.+ Query ::= ( Clause )* + Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) ++ + + Examples of appropriately formatted queries can be found in the query syntax + documentation. + + + + In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. + date:[6/1/2005 TO 6/4/2005] produces a range query that searches + for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + of the accepted input depends on {@link #SetLocale(Locale) the locale}. + By default a date is converted into a search term using the deprecated + {@link DateField} for compatibility reasons. + To use the new {@link DateTools} to convert dates, a + {@link Lucene.Net.Documents.DateTools.Resolution} has to be set. + + + The date resolution that shall be used for RangeQueries can be set + using {@link #SetDateResolution(DateTools.Resolution)} + or {@link #SetDateResolution(String, DateTools.Resolution)}. The former + sets the default date resolution for all fields, whereas the latter can + be used to set field specific date resolutions. Field specific date + resolutions take, if set, precedence over the default date resolution. + + + If you use neither {@link DateField} nor {@link DateTools} in your + index, you can create your own + query parser that inherits QueryParser and overwrites + {@link #GetRangeQuery(String, String, String, boolean)} to + use a different method for date conversion. + + + Note that QueryParser is not thread-safe. + + NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + + NOTE: there is a new QueryParser in contrib, which matches + the same syntax as this class, but is more modular, + enabling substantial customization to how a query is created. + NOTE: You must specify the required {@link Version} compatibility when + creating QueryParser: +
true
to allow leading wildcard characters.
+
+ When set, *
or ?
are allowed as
+ the first character of a PrefixQuery and WildcardQuery.
+ Note that this can produce very slow
+ queries on big indexes.
+
+ Default: false.
+ true
to enable position increments in result query.
+
+ When set, result phrase and multi-phrase queries will
+ be aware of position increments.
+ Useful when e.g. a StopFilter increases the position increment of
+ the token that follows an omitted token.
+
+ Default: false.
+ OR_OPERATOR
) terms without any modifiers
+ are considered optional: for example capital of Hungary
is equal to
+ capital OR of OR Hungary
.AND_OPERATOR
mode terms are considered to be in conjunction: the
+ above mentioned query is parsed as capital AND of AND Hungary
+ true
.
+ \\u0041
to A
.
+
+ \
.
+ java Lucene.Net.QueryParsers.QueryParser <input>
+ title
and body
):
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ title
and body
):
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ When you pass a boost (title=>5 body=>10) you can get
+
+
+
+ +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+ title
and body
):
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+
+ title
and body
):
+
+
+
+ (title:term1 body:term1) (title:term2 body:term2)
+
+
+
+ When setDefaultOperator(AND_OPERATOR) is set, the result will be:
+
+
+
+ +(title:term1 body:term1) +(title:term2 body:term2)
+
+
+
+ In other words, all the query's terms must appear, but it doesn't matter
+ in what fields they appear.
+
+ + <code> + (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + </code> ++ +
+ <code> + (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + </code> ++ +
+ Usage:
+
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse("query", fields, flags, analyzer);
+
+
+
+ The code above would construct a query:
+
+
+ (filename:query) +(contents:query) -(description:query)
+
+
+
+ + Usage: + <code> + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse("query", fields, flags, analyzer); + </code> ++ + The code above would construct a query: + +
+ <code> + (filename:query) +(contents:query) -(description:query) + </code> ++ +
+ Usage:
+
+ String[] query = {"query1", "query2", "query3"};
+ String[] fields = {"filename", "contents", "description"};
+ BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
+ BooleanClause.Occur.MUST,
+ BooleanClause.Occur.MUST_NOT};
+ MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+
+
+
+ The code above would construct a query:
+
+
+ (filename:query1) +(contents:query2) -(description:query3)
+
+
+
+ + Usage: + <code> + String[] query = {"query1", "query2", "query3"}; + String[] fields = {"filename", "contents", "description"}; + BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + BooleanClause.Occur.MUST, + BooleanClause.Occur.MUST_NOT}; + MultiFieldQueryParser.parse(query, fields, flags, analyzer); + </code> ++ + The code above would construct a query: + +
+ <code> + (filename:query1) +(contents:query2) -(description:query3) + </code> ++ +
autoCommit = true
then
+ this method will in general be called many times during
+ one instance of {@link IndexWriter}. If
+ autoCommit = false
then this method is
+ only called once when {@link IndexWriter#close} is
+ called, or not at all if the {@link IndexWriter#abort}
+ is called.
+
+ Note: the last CommitPoint is the most recent one,
+ i.e. the "front index state". Be careful not to delete it,
+ unless you know for sure what you are doing, and unless
+ you can afford to lose the index content while doing that.
+
+ true
, if the workaround
+ can be enabled (with no guarantees).
+ true
, if this platform supports unmapping mmaped files.IndexInput
+ is closed while another thread is still accessing it (SIGSEGV).
+ false
and the workaround cannot be enabled.
+ true
, if the unmap workaround is enabled.+ Searcher searcher = new IndexSearcher(indexReader); + final BitSet bits = new BitSet(indexReader.maxDoc()); + searcher.search(query, new HitCollector() { + public void collect(int doc, float score) { + bits.set(doc); + } + }); ++ + Note: This is called in an inner search loop. For good search + performance, implementations of this method should not call + {@link Searcher#Doc(int)} or + {@link Lucene.Net.Index.IndexReader#Document(int)} on every + document number encountered. Doing so can slow searches by an order + of magnitude or more. + Note: The
score
passed to this method is a raw score.
+ In other words, the score will not necessarily be a float whose value is
+ between 0 and 1.
+ + teacherid: 1 + studentfirstname: james + studentsurname: jones + + teacherid: 2 + studenfirstname: james + studentsurname: smith + studentfirstname: sally + studentsurname: jones ++ + a SpanNearQuery with a slop of 0 can be applied across two + {@link SpanTermQuery} objects as follows: +
+ SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james")); + SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones")); + SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname"); + Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false); ++ to search for 'studentfirstname:james studentsurname:jones' and find + teacherid 1 without matching teacherid 2 (which has a 'james' in position 0 + and 'jones' in position 1). + + Note: as {@link #GetField()} returns the masked field, scoring will be + done using the norms of the field name supplied. This may lead to unexpected + scoring behaviour. +
size
elements. If
+ prePopulate
is set to true, the queue will pre-populate itself
+ with sentinel objects and set its {@link #Size()} to size
. In
+ that case, you should not rely on {@link #Size()} to get the number of
+ actual elements that were added to the queue, but keep track yourself.prePopulate
is true, you should pop
+ elements from the queue using the following code example:
+
+ + PriorityQueue pq = new HitQueue(10, true); // pre-populate. + ScoreDoc top = pq.top(); + + // Add/Update one element. + top.score = 1.0f; + top.doc = 0; + top = (ScoreDoc) pq.updateTop(); + int totalHits = 1; + + // Now pop only the elements that were *truly* inserted. + // First, pop all the sentinel elements (there are pq.size() - totalHits). + for (int i = pq.size() - totalHits; i > 0; i--) pq.pop(); + + // Now pop the truly added elements. + ScoreDoc[] results = new ScoreDoc[totalHits]; + for (int i = totalHits - 1; i >= 0; i--) { + results[i] = (ScoreDoc) pq.pop(); + } ++ + NOTE: This class pre-allocate a full array of + length
size
.
+
+ positionIncrement == 0
.+ TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1)); + TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream(); + TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream(); + TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2)); + source2.addSinkTokenStream(sink1); + source2.addSinkTokenStream(sink2); + TokenStream final1 = new LowerCaseFilter(source1); + TokenStream final2 = source2; + TokenStream final3 = new EntityDetect(sink1); + TokenStream final4 = new URLDetect(sink2); + d.add(new Field("f1", final1)); + d.add(new Field("f2", final2)); + d.add(new Field("f3", final3)); + d.add(new Field("f4", final4)); ++ In this example,
sink1
and sink2
will both get tokens from both
+ reader1
and reader2
after whitespace tokenizer
+ and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ It is important, that tees are consumed before sinks (in the above example, the field names must be
+ less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+ add another sink and then pass all tokens to the sinks at once using {@link #consumeAllTokens}.
+ This TokenFilter is exhausted after this. In the above example, change
+ the example above to:
+ + ... + TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream()); + TokenStream final2 = source2.newSinkTokenStream(); + sink1.consumeAllTokens(); + sink2.consumeAllTokens(); + ... ++ In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready. + Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. +
TeeSinkTokenFilter
+ to this one. The supplied stream will also receive all consumed tokens.
+ This method can be used to pass tokens from two different tees to one sink.
+ TeeSinkTokenFilter
passes all tokens to the added sinks
+ when itself is consumed. To be sure, that all tokens from the input
+ stream are passed to the sinks, you can call this methods.
+ This instance is exhausted after this, but all sinks are instant available.
+ input
to a newly created JFlex scanner.
+ input
to the newly created JFlex scanner.
+
+ input
to the newly created JFlex scanner.
+
+ RAMDirectory
instance from a different
+ Directory
implementation. This can be used to load
+ a disk-based index into memory.
+
+ This should be used only with indices that can fit into memory.
+
+ Note that the resulting RAMDirectory
instance is fully
+ independent from the original Directory
(it is a
+ complete copy). Any subsequent changes to the
+ original Directory
will not be visible in the
+ RAMDirectory
instance.
+
+ Directory
value
+
+ RAMDirectory
instance from the {@link FSDirectory}.
+
+ File
specifying the index directory
+
+
+ RAMDirectory
instance from the {@link FSDirectory}.
+
+ String
specifying the full index directory path
+
+
+ + boolean skipTo(int target) { + do { + if (!next()) + return false; + } while (target > doc()); + return true; + } ++ Most implementations are considerably more efficient than that. +
byte[]
payloads
+ + Filter f = NumericRangeFilter.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); ++ + accepts all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + See {@link NumericRangeQuery} for details on how Lucene + indexes and searches numeric valued fields. + + NOTE: This API is experimental and + might change in incompatible ways in the next + release. + +
NumericRangeFilter
, that filters a long
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that queries a long
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that filters a int
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that queries a int
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that filters a double
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that queries a double
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that filters a float
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeFilter
, that queries a float
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ true
if the lower endpoint is inclusive true
if the upper endpoint is inclusive fields
object array
+ will have three elements, corresponding respectively to
+ the term values for the document in fields "a", "b" and "c".
+ The class of each element in the array will be either
+ Integer, Float or String depending on the type of values
+ in the terms of each field.
+
+ Created: Feb 11, 2004 1:23:38 PM
+
+ stored
attribute instead.
+
+ query
.
+ app*
.
+
+ This query uses the {@link
+ MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ rewrite method.
+ prefix
. FieldCache.DEFAULT
for maintaining
+ internal term lookup tables.
+
+ This class will not resolve SortField.AUTO types, and expects the type
+ of all SortFields used for construction to already have been resolved.
+ {@link SortField#DetectFieldType(IndexReader, String)} is a utility method which
+ may be used for field type detection.
+
+ NOTE: This API is experimental and might change in
+ incompatible ways in the next release.
+
+ numHits
.
+
+ null
or empty
+
+ The number of hits to retain. Must be greater than zero.
+
+ a
is less relevant than b
.true
if document a
should be sorted after document b
.
+ .f
+ a number and
+ from .s
+ a number. Also note that
+ Lucene's segments_N
files do not have any
+ filename extension.
+ FilterIndexReader
contains another IndexReader, which it
+ uses as its basic source of data, possibly transforming the data along the
+ way or providing additional functionality. The class
+ FilterIndexReader
itself simply implements all abstract methods
+ of IndexReader
with versions that pass all requests to the
+ contained index reader. Subclasses of FilterIndexReader
may
+ further override some of these methods and may also provide additional
+ methods and fields.
+ + public boolean skipTo(Term target) { + do { + if (!next()) + return false; + } while (target > term()); + return true; + } ++ Some implementations *could* be considerably more efficient than a linear scan. + Check the implementation to be sure. +
+ PerFieldAnalyzerWrapper aWrapper = + new PerFieldAnalyzerWrapper(new StandardAnalyzer()); + aWrapper.addAnalyzer("firstname", new KeywordAnalyzer()); + aWrapper.addAnalyzer("lastname", new KeywordAnalyzer()); ++ + In this example, StandardAnalyzer will be used for all fields except "firstname" + and "lastname", for which KeywordAnalyzer will be used. + + A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing + and query parsing. +
n
in the
+ array used to construct this searcher/reader.
+ q
for document d
correlates to the
+ cosine-distance or dot-product between document and query vectors in a
+
+ Vector Space Model (VSM) of Information Retrieval.
+ A document whose vector is closer to the query vector in that model is scored higher.
+
+ The score is computed as follows:
+
+
+
+
|
+ {@link Lucene.Net.Search.DefaultSimilarity#Tf(float) tf(t in d)} = + | ++ frequency½ + | +
+ {@link Lucene.Net.Search.DefaultSimilarity#Idf(int, int) idf(t)} = + | ++ 1 + log ( + | +
+
|
+ + ) + | +
+ queryNorm(q) = + {@link Lucene.Net.Search.DefaultSimilarity#QueryNorm(float) queryNorm(sumOfSquaredWeights)} + = + | +
+
|
+
+ {@link Lucene.Net.Search.Weight#SumOfSquaredWeights() sumOfSquaredWeights} = + {@link Lucene.Net.Search.Query#GetBoost() q.getBoost()} 2 + · + | ++ ∑ + | ++ ( + idf(t) · + t.getBoost() + ) 2 + | +
+ | t in q | ++ |
+ norm(t,d) = + {@link Lucene.Net.Documents.Document#GetBoost() doc.getBoost()} + · + {@link #LengthNorm(String, int) lengthNorm(field)} + · + | ++ ∏ + | ++ {@link Lucene.Net.Documents.Fieldable#GetBoost() f.getBoost}() + | +
+ | field f in d named as t | ++ |
numTokens
is large,
+ and larger values when numTokens
is small.
+
+ Note that the return values are computed under
+ {@link Lucene.Net.Index.IndexWriter#AddDocument(Lucene.Net.Documents.Document)}
+ and then stored using
+ {@link #EncodeNorm(float)}.
+ Thus they have limited precision, and documents
+ must be re-indexed if this method is altered.
+
+ freq
is large, and smaller values when freq
+ is small.
+
+ The default implementation calls {@link #Tf(float)}.
+
+ freq
is large, and smaller values when freq
+ is small.
+
+ + return idf(searcher.docFreq(term), searcher.maxDoc()); ++ + Note that {@link Searcher#MaxDoc()} is used instead of + {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is proportional to + {@link Searcher#DocFreq(Term)} , i.e., when one is inaccurate, + so is the other, and in the same direction. + +
+ idf(searcher.docFreq(term), searcher.maxDoc()); ++ + Note that {@link Searcher#MaxDoc()} is used instead of + {@link Lucene.Net.Index.IndexReader#NumDocs()} because it is + proportional to {@link Searcher#DocFreq(Term)} , i.e., when one is + inaccurate, so is the other, and in the same direction. + +
Scorer
implements {@link Scorer#SkipTo(int)},
+ and it uses the skipTo() on the given scorers.
+ ReqExclScorer
.position
as location - offset
, so that a
+ matching exact phrase is easily identified when all PhrasePositions
+ have exactly the same position
.
+ o
is equal to this. fieldsToLoad
and lazyFieldsToLoad
, lazy has precedence.
+
+ value
should be stored in the index
+
+ Whether the field should be indexed, and if so, if it should
+ be tokenized before indexing
+
+ null
value
should be stored in the index
+
+ Whether the field should be indexed, and if so, if it should
+ be tokenized before indexing
+
+ Whether term vector should be stored
+
+ null
TermVector.YES
value
should be stored in the index
+
+ Whether the field should be indexed, and if so, if it should
+ be tokenized before indexing
+
+ Whether term vector should be stored
+
+ null
TermVector.YES
null
null
null
null
value
should be stored (compressed or not)
+
+ Store.NO
value
should be stored (compressed or not)
+
+ Store.NO
+ SinkTokenizer sink1 = new SinkTokenizer(); + SinkTokenizer sink2 = new SinkTokenizer(); + TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2); + TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2); + TokenStream final1 = new LowerCaseFilter(source1); + TokenStream final2 = source2; + TokenStream final3 = new EntityDetect(sink1); + TokenStream final4 = new URLDetect(sink2); + d.add(new Field("f1", final1)); + d.add(new Field("f2", final2)); + d.add(new Field("f3", final3)); + d.add(new Field("f4", final4)); ++ In this example,
sink1
and sink2
will both get tokens from both
+ reader1
and reader2
after whitespace tokenizer
+ and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ It is important, that tees are consumed before sinks (in the above example, the field names must be
+ less the sink's field names).
+ Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene
+
+
+ See LUCENE-1058.
+
+ WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API.
+ If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers
+ the same functionality.
+ stopWords
is an instance of {@link CharArraySet} (true if
+ makeStopSet()
was used to construct the set) it will be directly used
+ and ignoreCase
will be ignored since CharArraySet
+ directly controls case sensitivity.
+
+ If stopWords
is not an instance of {@link CharArraySet},
+ a new CharArraySet will be constructed and ignoreCase
will be
+ used to specify the case sensitivity of that set.
+
+ stopWords
is an instance of {@link CharArraySet} (true if
+ makeStopSet()
was used to construct the set) it will be directly used
+ and ignoreCase
will be ignored since CharArraySet
+ directly controls case sensitivity.
+
+ If stopWords
is not an instance of {@link CharArraySet},
+ a new CharArraySet will be constructed and ignoreCase
will be
+ used to specify the case sensitivity of that set.
+
+ true
, this StopFilter will preserve
+ positions of the incoming tokens (ie, accumulate and
+ set position increments of the removed stop tokens).
+ Generally, true
is best as it does not
+ lose information (positions of the original tokens)
+ during indexing.
+
+ When set, when a token is stopped
+ (omitted), the position increment of the following
+ token is incremented.
+
+ NOTE: be sure to also
+ set {@link QueryParser#setEnablePositionIncrements} if
+ you use QueryParser to create queries.
+ len
chars of text
starting at off
+ are in the set
+ System.String
is in the set null
.
+ nextCharArray
for more efficient access.
+ + { pq.top().change(); pq.adjustTop(); } +instead of
+ { o = pq.pop(); o.change(); pq.push(o); } ++
OpenBitSet
is faster than java.util.BitSet
in most operations
+ and *much* faster at calculating cardinality of sets and results of set operations.
+ It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+
+ The goals of OpenBitSet
are the fastest implementation possible, and
+ maximum code reuse. Extra safety and encapsulation
+ may always be built on top, but if that's built in, the cost can never be removed (and
+ hence people re-implement their own version in order to get better performance).
+ If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ class, use java.util.BitSet
.
+
+ cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 | +
1% full | 3.31 | 3.90 | 1.04 | 0.99 | +
cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 | +
1% full | 2.51 | 3.49 | 1.00 | 1.02 | +
lowerTerm
+ but less/equal than upperTerm
.
+
+ If an endpoint is null, it is said to be "open". Either or both
+ endpoints may be open. Open endpoints may not be exclusive
+ (you can't select all but the first or last term without
+ explicitly specifying the term to exclude.)
+
+ lowerTerm
is included in the range.
+
+ If true, the upperTerm
is included in the range.
+
+ The collator to use to collate index Terms, to determine their
+ membership in the range bounded by lowerTerm
and
+ upperTerm
.
+
+
+ + Query q = NumericRangeQuery.newFloatRange("weight", + new Float(0.3f), new Float(0.10f), + true, true); ++ + matches all documents whose float valued "weight" field + ranges from 0.3 to 0.10, inclusive. + + The performance of NumericRangeQuery is much better + than the corresponding {@link TermRangeQuery} because the + number of terms that must be searched is usually far + fewer, thanks to trie indexing, described below. + + You can optionally specify a
precisionStep
+ when creating this query. This is necessary if you've
+ changed this configuration from its default (4) during
+ indexing. Lower values consume more disk space but speed
+ up searching. Suitable values are between 1 and
+ 8. A good starting point to test is 4,
+ which is the default value for all Numeric*
+ classes. See below for
+ details.
+
+ This query defaults to {@linkplain
+ MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for
+ 32 bit (int/float) ranges with precisionStep <8 and 64
+ bit (long/double) ranges with precisionStep <6.
+ Otherwise it uses {@linkplain
+ MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the
+ number of terms is likely to be high. With precision
+ steps of <4, this query can be run with one of the
+ BooleanQuery rewrite methods without changing
+ BooleanQuery's default max clause count.
+
+ NOTE: This API is experimental and
+ might change in incompatible ways in the next release.
+
+ TrieRangeQuery
):
+
+ Schindler, U, Diepenbroek, M, 2008. + Generic XML-based Framework for Metadata Portals. + Computers & Geosciences 34 (12), 1947-1955. + doi:10.1016/j.cageo.2008.02.023+ + A quote from this paper: Because Apache Lucene is a full-text + search engine and not a conventional database, it cannot handle numerical ranges + (e.g., field value is inside user defined bounds, even dates are numerical values). + We have developed an extension to Apache Lucene that stores + the numerical values in a special string-encoded format with variable precision + (all numerical values like doubles, longs, floats, and ints are converted to + lexicographic sortable string representations and stored with different precisions + (for a more detailed description of how the values are stored, + see {@link NumericUtils}). A range is then divided recursively into multiple intervals for searching: + The center of the range is searched only with the lowest possible precision in the trie, + while the boundaries are matched more exactly. This reduces the number of terms dramatically. + + For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that + uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the + lowest precision. Overall, a range could consist of a theoretical maximum of +
7*255*2 + 255 = 3825
distinct terms (when there is a term for every distinct value of an
+ 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used
+ because it would always be possible to reduce the full 256 values to one term with degraded precision).
+ In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records
+ and a uniform value distribution).
+
+ precisionStep
when encoding values.
+ Lower step values mean more precisions and so more terms in index (and index gets larger).
+ On the other hand, the maximum number of terms to match reduces, which optimized query speed.
+ The formula to calculate the maximum term count is:
+ + n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 ) ++ (this formula is only correct, when
bitsPerValue/precisionStep
is an integer;
+ in other cases, the value must be rounded up and the last summand must contain the modulo of the division as
+ precision step).
+ For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465
, and for a precision
+ step of 2, n = 31*3*2 + 3 = 189
. But the faster search speed is reduced by more seeking
+ in the term enum of the index. Because of this, the ideal precisionStep
value can only
+ be found out by testing. Important: You can index with a lower precision step value and test search speed
+ using a multiple of the original step value.
+
+ Good values for precisionStep
are depending on usage and data type:
+ precisionStep
is given.precisionStep
). Using {@link NumericField NumericFields} for sorting
+ is ideal, because building the field cache is much faster than with text-only numbers.
+ Sorting is also possible with range query optimized fields using one of the above precisionSteps
.NumericRangeQuery
, that queries a long
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a long
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a int
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a int
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a double
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a double
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a float
+ range using the given precisionStep
.
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ NumericRangeQuery
, that queries a float
+ range using the default precisionStep
{@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
+ You can have half-open ranges (which are in fact </≤ or >/≥ queries)
+ by setting the min or max value to null
. By setting inclusive to false, it will
+ match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
+ true
if the lower endpoint is inclusive true
if the upper endpoint is inclusive false
ends iterating the current enum
+ and forwards to the next sub-range.
+ shift
value (number of bits removed) used
+ during encoding.
+
+ To also index floating point numbers, this class supplies two methods to convert them
+ to integer values by changing their bit layout: {@link #doubleToSortableLong},
+ {@link #floatToSortableInt}. You will have no precision loss by
+ converting floating point numbers to integers and back (only that the integer form
+ is not usable). Other data types like dates can easily converted to longs or ints (e.g.
+ date to long: {@link java.util.Date#getTime}).
+
+ For easy usage, the trie algorithm is implemented for indexing inside
+ {@link NumericTokenStream} that can index int
, long
,
+ float
, and double
. For querying,
+ {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part
+ for the same data types.
+
+ This class can also be used, to generate lexicographically sortable (according
+ {@link String#compareTo(String)}) representations of numeric data types for other
+ usages (e.g. sorting).
+
+ NOTE: This API is experimental and
+ might change in incompatible ways in the next release.
+
+ char[]
buffer size)
+ for encoding long
values.
+ char[]
buffer size)
+ for encoding int
values.
+ SHIFT_START_LONG+shift
in the first character
+ SHIFT_START_INT+shift
in the first character
+ shift
bits.
+ This is method is used by {@link NumericTokenStream}.
+ shift
bits.
+ This is method is used by {@link LongRangeBuilder}.
+ shift
bits.
+ This is method is used by {@link NumericTokenStream}.
+ shift
bits.
+ This is method is used by {@link IntRangeBuilder}.
+ double
value to a sortable signed long
.
+ The value is converted by getting their IEEE 754 floating-point "double format"
+ bit layout and then some bits are swapped, to be able to compare the result as long.
+ By this the precision is not reduced, but the value can easily used as a long.
+ long
back to a double
.float
value to a sortable signed int
.
+ The value is converted by getting their IEEE 754 floating-point "float format"
+ bit layout and then some bits are swapped, to be able to compare the result as int.
+ By this the precision is not reduced, but the value can easily used as an int.
+ int
back to a float
.reader
which share a prefix of
+ length prefixLength
with term
and which have a fuzzy similarity >
+ minSimilarity
.
+
+ After calling the constructor the enumeration is already pointing to the first
+ valid term if such a term exists.
+
+ + editDistance < maximumEditDistance+ Otherwise it returns: +
+ 1 - (editDistance / length)+ where length is the length of the shortest term (text or target) including a + prefix that are identical and editDistance is the Levenshtein distance for + the two words. + + Embedded within this algorithm is a fail-fast Levenshtein distance + algorithm. The fail-fast algorithm differs from the standard Levenshtein + distance algorithm in that it is aborted if it is discovered that the + mimimum distance between the words is greater than some threshold. + + To calculate the maximum distance threshold we use the following formula: +
+ (1 - minimumSimilarity) * length+ where length is the shortest term including any prefix that is not part of the + similarity comparision. This formula was derived by solving for what maximum value + of distance returns false for the following statements: +
+ similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); + return (similarity > minimumSimilarity);+ where distance is the Levenshtein distance for the two words. + + Levenshtein distance (also known as edit distance) is a measure of similiarity + between two strings where the distance is measured as the number of character + deletions, insertions or substitutions required to transform one string to + the other string. +
getTerms
+ method. Each location in the array contains the number of times this
+ term occurs in the document or the document field.
+ getTerms
at which the term with the specified
+ term
appears. If this term does not appear in the array,
+ return -1.
+ indexOf(int)
but searches for a number of terms
+ at the same time. Returns an array that has the same size as the number
+ of terms searched for, each slot containing the result of searching for
+ that term number.
+
+ TermFreqVector
to provide additional information about
+ positions in which each of the terms is found. A TermPositionVector not necessarily
+ contains both positions and offsets, but at least one of these arrays exists.
+ indexOf
method.
+ May return null if positions have not been stored.
+ indexOf
method.
+ true
if the index is optimized; false
otherwise
+ write.lock
could not be
+ obtained)
+ IndexWriter
creates and maintains an index.
+ The create
argument to the {@link
+ #IndexWriter(Directory, Analyzer, boolean) constructor} determines
+ whether a new index is created, or whether an existing index is
+ opened. Note that you can open an index with create=true
+ even while readers are using the index. The old readers will
+ continue to search the "point in time" snapshot they had opened,
+ and won't see the newly created index until they re-open. There are
+ also {@link #IndexWriter(Directory, Analyzer) constructors}
+ with no create
argument which will create a new index
+ if there is not already an index at the provided path and otherwise
+ open the existing index.
+ In either case, documents are added with {@link #AddDocument(Document)
+ addDocument} and removed with {@link #DeleteDocuments(Term)} or {@link
+ #DeleteDocuments(Query)}. A document can be updated with {@link
+ #UpdateDocument(Term, Document) updateDocument} (which just deletes
+ and then adds the entire document). When finished adding, deleting
+ and updating documents, {@link #Close() close} should be called.
+
+ These changes are buffered in memory and periodically
+ flushed to the {@link Directory} (during the above method
+ calls). A flush is triggered when there are enough
+ buffered deletes (see {@link #setMaxBufferedDeleteTerms})
+ or enough added documents since the last flush, whichever
+ is sooner. For the added documents, flushing is triggered
+ either by RAM usage of the documents (see {@link
+ #setRAMBufferSizeMB}) or the number of added documents.
+ The default is to flush when RAM usage hits 16 MB. For
+ best indexing speed you should flush by RAM usage with a
+ large RAM buffer. Note that flushing just moves the
+ internal buffered state in IndexWriter into the index, but
+ these changes are not visible to IndexReader until either
+ {@link #Commit()} or {@link #close} is called. A flush may
+ also trigger one or more segment merges which by default
+ run with a background thread so as not to block the
+ addDocument calls (see below
+ for changing the {@link MergeScheduler}).
+
+ The optional autoCommit
argument to the {@link
+ #IndexWriter(Directory, boolean, Analyzer) constructors}
+ controls visibility of the changes to {@link IndexReader}
+ instances reading the same index. When this is
+ false
, changes are not visible until {@link
+ #Close()} or {@link #Commit()} is called. Note that changes will still be
+ flushed to the {@link Directory} as new files, but are
+ not committed (no new segments_N
file is written
+ referencing the new files, nor are the files sync'd to stable storage)
+ until {@link #Close()} or {@link #Commit()} is called. If something
+ goes terribly wrong (for example the JVM crashes), then
+ the index will reflect none of the changes made since the
+ last commit, or the starting state if commit was not called.
+ You can also call {@link #Rollback()}, which closes the writer
+ without committing any changes, and removes any index
+ files that had been flushed but are now unreferenced.
+ This mode is useful for preventing readers from refreshing
+ at a bad time (for example after you've done all your
+ deletes but before you've done your adds). It can also be
+ used to implement simple single-writer transactional
+ semantics ("all or none"). You can do a two-phase commit
+ by calling {@link #PrepareCommit()}
+ followed by {@link #Commit()}. This is necessary when
+ Lucene is working with an external resource (for example,
+ a database) and both must either commit or rollback the
+ transaction.
+ When autoCommit
is true
then
+ the writer will periodically commit on its own. [Deprecated: Note that in 3.0, IndexWriter will
+ no longer accept autoCommit=true (it will be hardwired to
+ false). You can always call {@link #Commit()} yourself
+ when needed]. There is
+ no guarantee when exactly an auto commit will occur (it
+ used to be after every flush, but it is now after every
+ completed merge, as of 2.4). If you want to force a
+ commit, call {@link #Commit()}, or, close the writer. Once
+ a commit has finished, newly opened {@link IndexReader} instances will
+ see the changes to the index as of that commit. When
+ running in this mode, be careful not to refresh your
+ readers while optimize or segment merges are taking place
+ as this can tie up substantial disk space.
+ autoCommit
, an {@link
+ IndexReader} or {@link Lucene.Net.Search.IndexSearcher} will only see the
+ index as of the "point in time" that it was opened. Any
+ changes committed to the index after the reader was opened
+ are not visible until the reader is re-opened.
+ If an index will not have more documents added for a while and optimal search
+ performance is desired, then either the full {@link #Optimize() optimize}
+ method or partial {@link #Optimize(int)} method should be
+ called before the index is closed.
+ Opening an IndexWriter
creates a lock file for the directory in use. Trying to open
+ another IndexWriter
on the same directory will lead to a
+ {@link LockObtainFailedException}. The {@link LockObtainFailedException}
+ is also thrown if an IndexReader on the same directory is used to delete documents
+ from the index.
+ IndexWriter
allows an optional
+ {@link IndexDeletionPolicy} implementation to be
+ specified. You can use this to control when prior commits
+ are deleted from the index. The default policy is {@link
+ KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ commits as soon as a new commit is done (this matches
+ behavior before 2.2). Creating your own policy can allow
+ you to explicitly keep previous "point in time" commits
+ alive in the index for some time, to allow readers to
+ refresh to the new commit without having the old commit
+ deleted out from under them. This is necessary on
+ filesystems like NFS that do not support "delete on last
+ close" semantics, which Lucene's "point in time" search
+ normally relies on.
+ Expert:
+ IndexWriter
allows you to separately change
+ the {@link MergePolicy} and the {@link MergeScheduler}.
+ The {@link MergePolicy} is invoked whenever there are
+ changes to the segments in the index. Its role is to
+ select which merges to do, if any, and return a {@link
+ MergePolicy.MergeSpecification} describing the merges. It
+ also selects merges to do for optimize(). (The default is
+ {@link LogByteSizeMergePolicy}. Then, the {@link
+ MergeScheduler} is invoked with the requested merges and
+ it decides when and how to run the merges. The default is
+ {@link ConcurrentMergeScheduler}.
+ NOTE: if you hit an
+ OutOfMemoryError then IndexWriter will quietly record this
+ fact and block all future segment commits. This is a
+ defensive measure in case any internal state (buffered
+ documents and deletions) were corrupted. Any subsequent
+ calls to {@link #Commit()} will throw an
+ IllegalStateException. The only course of action is to
+ call {@link #Close()}, which internally will call {@link
+ #Rollback()}, to undo any changes to the index since the
+ last commit. If you opened the writer with autoCommit
+ false you can also just call {@link #Rollback()}
+ directly.
+ NOTE: {@link
+ IndexWriter
} instances are completely thread
+ safe, meaning multiple threads can call any of its
+ methods, concurrently. If your application requires
+ external synchronization, you should not
+ synchronize on the IndexWriter
instance as
+ this may cause deadlock; use your own (non-Lucene) objects
+ instead.
+ numUniqueTerms/interval
terms are read into
+ memory by an IndexReader, and, on average, interval/2
terms
+ must be scanned for each random term access.
+
+ path
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ path
, replacing the index already there,
+ if any.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ Maximum field length in number of tokens/terms: LIMITED, UNLIMITED, or user-specified
+ via the MaxFieldLength constructor.
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ path
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ path
, replacing the index already there, if any.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ path
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ path
, replacing the index already there, if any.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ via the MaxFieldLength constructor.
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ path
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ path
, replacing the index already there, if any.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ d
, replacing the index already there, if any.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ Maximum field length in number of terms/tokens: LIMITED, UNLIMITED, or user-specified
+ via the MaxFieldLength constructor.
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ d
, replacing the index already there, if any.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ path
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ write.lock
could not
+ be obtained)
+ path
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ write.lock
could not
+ be obtained)
+ path
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ write.lock
could not
+ be obtained)
+ path
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ write.lock
could not
+ be obtained)
+ d
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ write.lock
could not
+ be obtained)
+ d
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ write.lock
could not
+ be obtained)
+ d
, first creating it if it does not
+ already exist. Text will be analyzed with
+ a
.
+
+ write.lock
could not
+ be obtained)
+ d
.
+ Text will be analyzed with a
. If create
+ is true, then a new, empty index will be created in
+ d
, replacing the index already there, if any.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
,
+ first creating it if it does not already exist. Text
+ will be analyzed with a
.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ write.lock
could not
+ be obtained)
+ d
,
+ first creating it if it does not already exist. Text
+ will be analyzed with a
.
+
+ write.lock
could not
+ be obtained)
+ d
.
+ Text will be analyzed with a
. If
+ create
is true, then a new, empty index
+ will be created in d
, replacing the index
+ already there, if any.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ see above
+
+ {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}, whether or not to limit field lengths. Value is in number of terms/tokens
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
.
+ Text will be analyzed with a
. If
+ create
is true, then a new, empty index
+ will be created in d
, replacing the index
+ already there, if any.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ see above
+
+ whether or not to limit field lengths, value is in number of terms/tokens. See {@link Lucene.Net.Index.IndexWriter.MaxFieldLength}.
+
+ the {@link DocConsumer} chain to be used to
+ process documents
+
+ which commit to open
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
.
+ Text will be analyzed with a
. If
+ create
is true, then a new, empty index
+ will be created in d
, replacing the index
+ already there, if any.
+
+ true
to create the index or overwrite
+ the existing one; false
to append to the existing
+ index
+
+ see above
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ d
. Text will be analyzed
+ with a
.
+
+ This is only meaningful if you've used a {@link
+ IndexDeletionPolicy} in that past that keeps more than
+ just the last commit.
+
+ This operation is similar to {@link #Rollback()},
+ except that method can only rollback what's been done
+ with the current instance of IndexWriter since its last
+ commit, whereas this method can rollback to an
+ arbitrary commit point from the past, assuming the
+ {@link IndexDeletionPolicy} has preserved past
+ commits.
+
+ NOTE: autoCommit (see above) is set to false with this
+ constructor.
+
+ write.lock
could not
+ be obtained)
+ create
is
+ false
or if there is any other low-level
+ IO error
+ int
s when managing its
+ internal storage, the absolute maximum value for this setting is somewhat
+ less than 2048 MB. The precise limit depends on various factors, such as
+ how large your documents are, how many fields have norms, etc., so it's
+ best to set this value comfortably under 2048.
+
+
+ The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
+
+ + try { + writer.close(); + } finally { + if (IndexWriter.isLocked(directory)) { + IndexWriter.unlock(directory); + } + } ++ + after which, you must be certain not to use the writer + instance anymore. + + NOTE: if this method hits an OutOfMemoryError + you should immediately close the writer, again. See above for details. + +
term
.
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer. See above for details.
+
+ term
and then adding the new
+ document. The delete and then add are atomic as seen
+ by a reader on the same index (flush may happen only after
+ the add).
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer. See above for details.
+
+ term
and then adding the new
+ document. The delete and then add are atomic as seen
+ by a reader on the same index (flush may happen only after
+ the add).
+
+ NOTE: if this method hits an OutOfMemoryError
+ you should immediately close the writer. See above for details.
+
+ IndexWriter
without committing
+ any changes that have occurred since the last commit
+ (or since it was opened, if commit hasn't been called).
+ This removes any temporary files that had been created,
+ after which the state of the index will be the same as
+ it was when commit() was last called or when this
+ writer was first opened. This can only be called when
+ this IndexWriter was opened with
+ autoCommit=false
. This also clears a
+ previous call to {@link #prepareCommit}.
+ autoCommit=true
.
+ true
iff the index in the named directory is
+ currently locked.
+ true
iff the index in the named directory is
+ currently locked.
+ + java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] ++
-fix
: actually write a new segments_N file, removing any problematic segments-segment X
: only check the specified
+ segment(s). This can be specified multiple times,
+ to check more than one segment, eg -segment _2
+ -segment _a
. You can't use this with the -fix
+ option.-fix
should only be used on an emergency basis as it will cause
+ documents (perhaps many) to be permanently removed from the index. Always make
+ a backup copy of your index before running this! Do not run this tool on an index
+ that is actively being written to. You have been warned!
+ Run without -fix, this tool will open the index, report version information
+ and report any exceptions it hits and what action it would take if -fix were
+ specified. With -fix, this tool will remove any segments that have issues and
+ write a new segments_N file. This means all documents contained in the affected
+ segments will be removed.
+
+ This tool exits with exit code 1 if the index cannot be opened or has any
+ corruption, else 0.
+ + document.add(new NumericField(name).setIntValue(value)); ++ + For optimal performance, re-use the +
NumericField
and {@link Document} instance for more than
+ one document:
+
+ + NumericField field = new NumericField(name); + Document document = new Document(); + document.add(field); + + for(all documents) { + ... + field.setIntValue(value) + writer.addDocument(document); + ... + } ++ + The java native types
int
, long
,
+ float
and double
are
+ directly supported. However, any value that can be
+ converted into these native types can also be indexed.
+ For example, date/time values represented by a
+ {@link java.util.Date} can be translated into a long
+ value using the {@link java.util.Date#getTime} method. If you
+ don't need millisecond precision, you can quantize the
+ value, either by dividing the result of
+ {@link java.util.Date#getTime} or using the separate getters
+ (for year, month, etc.) to construct an int
or
+ long
value.
+
+ To perform range querying or filtering against a
+ NumericField
, use {@link NumericRangeQuery} or {@link
+ NumericRangeFilter}. To sort according to a
+ NumericField
, use the normal numeric sort types, eg
+ {@link SortField#INT} (note that {@link SortField#AUTO}
+ will not work with these fields). NumericField
values
+ can also be loaded directly from {@link FieldCache}.
+
+ By default, a NumericField
's value is not stored but
+ is indexed for range filtering and sorting. You can use
+ the {@link #NumericField(String,Field.Store,boolean)}
+ constructor if you need to change these defaults.
+
+ You may add the same field name as a NumericField
to
+ the same document more than once. Range querying and
+ filtering will be the logical OR of all values; so a range query
+ will hit all documents that have at least one value in
+ the range. However sort behavior is not defined. If you need to sort,
+ you should separately index a single-valued NumericField
.
+
+ A NumericField
will consume somewhat more disk space
+ in the index than an ordinary single-valued field.
+ However, for a typical index that includes substantial
+ textual content per document, this increase will likely
+ be in the noise.
+
+ Within Lucene, each numeric value is indexed as a
+ trie structure, where each term is logically
+ assigned to larger and larger pre-defined brackets (which
+ are simply lower-precision representations of the value).
+ The step size between each successive bracket is called the
+ precisionStep
, measured in bits. Smaller
+ precisionStep
values result in larger number
+ of brackets, which consumes more disk space in the index
+ but may result in faster range search performance. The
+ default value, 4, was selected for a reasonable tradeoff
+ of disk space consumption versus performance. You can
+ use the expert constructor {@link
+ #NumericField(String,int,Field.Store,boolean)} if you'd
+ like to change the value. Note that you must also
+ specify a congruent value when creating {@link
+ NumericRangeQuery} or {@link NumericRangeFilter}.
+ For low cardinality fields larger precision steps are good.
+ If the cardinality is < 100, it is fair
+ to use {@link Integer#MAX_VALUE}, which produces one
+ term per value.
+
+ For more information on the internals of numeric trie
+ indexing, including the precisionStep
+ configuration, see {@link NumericRangeQuery}. The format of
+ indexed values is described in {@link NumericUtils}.
+
+ If you only need to sort by numeric value, and never
+ run range querying/filtering, you can index using a
+ precisionStep
of {@link Integer#MAX_VALUE}.
+ This will minimize disk space consumed.
+
+ More advanced users can instead use {@link
+ NumericTokenStream} directly, when indexing numbers. This
+ class is a wrapper around this token stream type for
+ easier, more intuitive usage.
+
+ NOTE: This class is only used during
+ indexing. When retrieving the stored field value from a
+ {@link Document} instance after search, you will get a
+ conventional {@link Fieldable} instance where the numeric
+ values are returned as {@link String}s (according to
+ toString(value)
of the used data type).
+
+ NOTE: This API is
+ experimental and might change in incompatible ways in the
+ next release.
+
+ precisionStep
+ {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+ a numeric value, before indexing a document containing this field,
+ set a value using the various set???Value() methods.
+ This constructor creates an indexed, but not stored field.
+ precisionStep
+ {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+ a numeric value, before indexing a document containing this field,
+ set a value using the various set???Value() methods.
+ toString(value)
of the used data type)
+
+ if the field should be indexed using {@link NumericTokenStream}
+
+ precisionStep
. The instance is not yet initialized with
+ a numeric value, before indexing a document containing this field,
+ set a value using the various set???Value() methods.
+ This constructor creates an indexed, but not stored field.
+ precisionStep
. The instance is not yet initialized with
+ a numeric value, before indexing a document containing this field,
+ set a value using the various set???Value() methods.
+ toString(value)
of the used data type)
+
+ if the field should be indexed using {@link NumericTokenStream}
+
+ null
for numeric fields null
for numeric fields null
for numeric fields null
if not yet initialized. long
value.document.add(new NumericField(name, precisionStep).SetLongValue(value))
+ int
value.document.add(new NumericField(name, precisionStep).setIntValue(value))
+ double
value.document.add(new NumericField(name, precisionStep).setDoubleValue(value))
+ float
value.document.add(new NumericField(name, precisionStep).setFloatValue(value))
+ write.lock
+ could not be released.
+ t
. o
is equal to this. Searchables
.
+
+ Applications usually need only call the inherited {@link #Search(Query)}
+ or {@link #Search(Query,Filter)} methods.
+ n
in the array
+ used to construct this searcher.
+ n
within its
+ sub-index.
+ null
.
+
+ Filter to apply to query results, cannot be null
.
+
+ o
is equal to this. IndexModifier
object
+ on the same directory at the same time.
+
+ Example usage:
+
+
+
+
+
+
+
+ Analyzer analyzer = new StandardAnalyzer();
+ |
+
+
IndexModifier
.
+
+ Although an instance of this class can be used from more than one
+ thread, you will not get the best performance. You might want to use
+ IndexReader and IndexWriter directly for that (but you will need to
+ care about synchronization yourself then).
+
+ While you can freely mix calls to add() and delete() using this class,
+ you should batch you calls for best performance. For example, if you
+ want to update 20 documents, you should first delete all those documents,
+ then add all the new documents.
+
+ true
to create the index or overwrite the existing one;
+ false
to append to the existing index
+
+ write.lock
could not
+ be obtained)
+ true
to create the index or overwrite the existing one;
+ false
to append to the existing index
+
+ write.lock
could not
+ be obtained)
+ true
to create the index or overwrite the existing one;
+ false
to append to the existing index
+
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ term
.
+ This is useful if one uses a document field to hold a unique ID string for
+ the document. Then to delete such a document, one merely constructs a
+ term with the appropriate field and the unique ID string as its text and
+ passes it to this method. Returns the number of documents deleted.
+ write.lock
could not
+ be obtained)
+ docNum
.write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ write.lock
could not
+ be obtained)
+ Resolution.DAY
or lower.
+
+
+ Another approach is {@link NumericUtils}, which provides
+ a sortable binary representation (prefix encoded) of numeric values, which
+ date/time are.
+ For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ long
using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ index this as a numeric value with {@link NumericField}
+ and use {@link NumericRangeQuery} to query it.
+ yyyyMMddHHmmssSSS
or shorter,
+ depending on resolution
; using GMT as timezone
+ yyyyMMddHHmmssSSS
or shorter,
+ depending on resolution
; using GMT as timezone
+ timeToString
or
+ DateToString
back to a time, represented as the
+ number of milliseconds since January 1, 1970, 00:00:00 GMT.
+
+ dateString
is not in the timeToString
or
+ DateToString
back to a time, represented as a
+ Date object.
+
+ dateString
is not in the 2004-09-21 13:50:11
+ will be changed to 2004-09-01 00:00:00
when using
+ Resolution.MONTH
.
+
+ resolution
+ set to 0 or 1
+ 1095767411000
+ (which represents 2004-09-21 13:50:11) will be changed to
+ 1093989600000
(2004-09-01 00:00:00) when using
+ Resolution.MONTH
.
+
+ resolution
+ set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ word\tstem+ (i.e. two tab seperated words) + +
aState
false
, iff there was new input.
+
+ WildcardTermEnum
.
+
+ After calling the constructor the enumeration is already pointing to the first
+ valid term if such a term exists.
+ numHits
, and fill the array with sentinel
+ objects.
+ collator
parameter will cause every single
+ index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ examined. Depending on the number of index Terms in this Field, the
+ operation could be very slow.
+
+ fieldName
matching
+ less than or equal to upperTerm
.
+ fieldName
matching
+ greater than or equal to lowerTerm
.
+ true
if the lower endpoint is inclusive true
if the upper endpoint is inclusive include
which
+ have no overlap with spans from exclude
.
+ o
is equal to this. slop
total unmatched positions between
+ them. * When inOrder
is true, the spans from each clause
+ must be * ordered as in clauses
.
+ o
is equal to this. getShorts()
and makes those values
+ available as other numeric types, casting as needed.
+
+
+ WARNING: The status of the Search.Function package is experimental.
+ The APIs introduced here might change in the future and will not be
+ supported anymore in such a case.
+
+ o
is equal to this. + ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ... ++
File.createNewFile
contain a vague
+ yet spooky warning about not using the API for file
+ locking. This warning was added due to this
+ bug, and in fact the only known problem with using
+ this API for locking is that the Lucene write lock may
+ not be released when the JVM exits abnormally.
+ When this happens, a {@link LockObtainFailedException}
+ is hit when trying to create a writer, in which case you
+ need to explicitly clear the lock file first. You can
+ either manually remove the file, or use the {@link
+ org.apache.lucene.index.IndexReader#unlock(Directory)}
+ API. But, first be certain that no writer is in fact
+ writing to the index otherwise you can easily corrupt
+ your index.
+
+ If you suspect that this or any other LockFactory is
+ not working properly in your environment, you can easily
+ test it by using {@link VerifyingLockFactory}, {@link
+ LockVerifyServer} and {@link LockStressTest}.
+
+ t1 t2 t3+ with slop at least 1, the fragment: +
t1 t2 t1 t3 t2 t3+ matches twice: +
t1 t2 .. t3+
t1 .. t2 t3+ + + Expert: + Only public for subclassing. Most implementations should not need this class +
state.getBoost()*lengthNorm(numTerms)
, where
+ numTerms
is {@link FieldInvertState#GetLength()} if {@link
+ #setDiscountOverlaps} is false, else it's {@link
+ FieldInvertState#GetLength()} - {@link
+ FieldInvertState#GetNumOverlap()}.
+
+ WARNING: This API is new and experimental, and may suddenly
+ change.
+ 1/sqrt(numTerms)
. 1/sqrt(sumOfSquaredWeights)
. sqrt(freq)
. 1 / (distance + 1)
. log(numDocs/(docFreq+1)) + 1
. overlap / maxOverlap
. super()
in order to filter which
+ documents are collected.
+
+ super()
in order to filter which
+ documents are collected.
+
+ Scorer
for documents matching a Term
.TermScorer
.
+
+ Term
in the query.
+
+ An iterator over the documents matching the Term
.
+
+ The Similarity
implementation to be used for score
+ computations.
+
+ The field norms of the document fields for the Term
.
+
+ TermScorer
. match
whose end
+ position is less than or equal to end
.
+ query
.
+ lowerTerm
but less than upperTerm
.
+ There must be at least one term and either term may be null,
+ in which case there is no bound on that side, but if there are
+ two terms, both terms must be for the same field.
+
+ lowerTerm
and
+ upperTerm
will themselves be included in the range.
+
+ lowerTerm
but less than upperTerm
.
+ There must be at least one term and either term may be null,
+ in which case there is no bound on that side, but if there are
+ two terms, both terms must be for the same field.
+
+ If collator
is not null, it will be used to decide whether
+ index terms are within the given range, rather than using the Unicode code
+ point order in which index terms are stored.
+
+ WARNING: Using this constructor and supplying a non-null
+ value in the collator
parameter will cause every single
+ index Term in the Field referenced by lowerTerm and/or upperTerm to be
+ examined. Depending on the number of index Terms in this Field, the
+ operation could be very slow.
+
+ lowerTerm
and
+ upperTerm
will themselves be included in the range.
+
+ The collator to use to collate index Terms, to determine
+ their membership in the range bounded by lowerTerm
and
+ upperTerm
.
+
+ true
if the range query is inclusive o
is equal to this. null
.
+ This is to handle the case using ParallelMultiSearcher where the
+ original list contains AUTO and we don't know the actual sort
+ type until the values come back. The fields can only be set once.
+ This method is thread safe.
+ null
. The collators
+ correspond to any SortFields which were given a specific locale.
+ null
.
+ a
is less relevant than b
.true
if document a
should be sorted after document b
.
+ Searchables
.
+
+ Applications usually need only call the inherited {@link #Search(Query)}
+ or {@link #Search(Query,Filter)} methods.
+ o
is equal to this. long
using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ index this as a numeric value with {@link NumericField}
+ and use {@link NumericRangeQuery} to query it.
+
+ write.lock
+ could not be acquired. This
+ happens when a writer tries to open an index
+ that another writer already has open.
+ query
.
+ o
is equal to this. null
as its
+ detail message. The cause is not initialized, and may subsequently be
+ initialized by a call to {@link #innerException}.
+ cause
is not automatically incorporated in
+ this runtime exception's detail message.
+
+ Field[]
array
+ Fieldable[]
array
+ String[]
of field values
+ byte[][]
of binary field values
+ null
+ if no binary fields with the specified name are available.
+ There may be non-binary fields with the same name.
+
+ byte[]
containing the binary field value or null
+ { + + public Document BuildDocument(Quote quote) { + var document = new Document(); + document.Add(new Field("id", quote.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); + document.Add(new Field("text", quote.Text, Field.Store.YES, Field.Index.ANALYZED)); + return document; + } + + public Term GetIdTerm(Quote quote) { + return new Term("id", quote.Id.ToString()); + } + } +} \ No newline at end of file diff --git a/Src/VideoGameQuotes.Api/Search/SearchIndexException.cs b/Src/VideoGameQuotes.Api/Search/SearchIndexException.cs new file mode 100644 index 0000000..ad275c4 --- /dev/null +++ b/Src/VideoGameQuotes.Api/Search/SearchIndexException.cs @@ -0,0 +1,10 @@ +using System; + +namespace VideoGameQuotes.Api.Search { + ///+ /// Raised when an error occurs while reading/writing a search index + /// + public class SearchIndexException : Exception { + public SearchIndexException(string message = null, Exception innerException = null) : base(message, innerException) { } + } +} \ No newline at end of file diff --git a/Src/VideoGameQuotes.Api/Search/SearchResult.cs b/Src/VideoGameQuotes.Api/Search/SearchResult.cs index 5d6c37c..1605de8 100644 --- a/Src/VideoGameQuotes.Api/Search/SearchResult.cs +++ b/Src/VideoGameQuotes.Api/Search/SearchResult.cs @@ -2,7 +2,7 @@ ////// Represents a search result /// - public class SearchResult { + public class SearchResult{ /// /// A value (between 0 and 1, the higher the better) representing how good /// the match is between the search query and the value @@ -12,6 +12,6 @@ /// /// The matched quote /// - public Quote Quote { get; set; } + public T Entity { get; set; } } } \ No newline at end of file diff --git a/Src/VideoGameQuotes.Api/VideoGameQuotes.Api.csproj b/Src/VideoGameQuotes.Api/VideoGameQuotes.Api.csproj index fb9a549..409d4a8 100644 --- a/Src/VideoGameQuotes.Api/VideoGameQuotes.Api.csproj +++ b/Src/VideoGameQuotes.Api/VideoGameQuotes.Api.csproj @@ -69,10 +69,13 @@- + + + - - + + + @@ -82,6 +85,8 @@ + + @@ -104,6 +109,7 @@ +