001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Objects; 020 021/** 022 * The interface that describes a Bloom filter that associates a count with each 023 * bit index rather than a bit. This allows reversal of merge operations with 024 * remove operations. 025 * 026 * <p>A counting Bloom filter is expected to function identically to a standard 027 * Bloom filter that is the merge of all the Bloom filters that have been added 028 * to and not later subtracted from the counting Bloom filter. The functional 029 * state of a CountingBloomFilter at the start and end of a series of merge and 030 * subsequent remove operations of the same Bloom filters, irrespective of 031 * remove order, is expected to be the same.</p> 032 * 033 * <p>Removal of a filter that has not previously been merged results in an 034 * invalid state where the cells no longer represent a sum of merged Bloom 035 * filters. It is impossible to validate merge and remove exactly without 036 * explicitly storing all filters. Consequently such an operation may go 037 * undetected. The CountingBloomFilter maintains a state flag that is used as a 038 * warning that an operation was performed that resulted in invalid cells and 039 * thus an invalid state. For example this may occur if a cell for an index was 040 * set to negative following a remove operation.</p> 041 * 042 * <p>Implementations should document the expected state of the filter after an 043 * operation that generates invalid cells, and any potential recovery options. 044 * An implementation may support a reversal of the operation to restore the 045 * state to that prior to the operation. In the event that invalid cells are 046 * adjusted to a valid range then it should be documented if there has been 047 * irreversible information loss.</p> 048 * 049 * <p>Implementations may choose to throw an exception during an operation that 050 * generates invalid cells. Implementations should document the expected state 051 * of the filter after such an operation. For example are the cells not updated, 052 * partially updated or updated entirely before the exception is raised.</p> 053 * 054 * @see CellExtractor 055 * @since 4.5 056 */ 057public interface CountingBloomFilter extends BloomFilter, CellExtractor { 058 059 // Query Operations 060 061 /** 062 * Adds the specified CellExtractor to this Bloom filter. 063 * 064 * <p>Specifically 065 * all cells for the indexes identified by the {@code other} will be incremented 066 * by their corresponding values in the {@code other}.</p> 067 * 068 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 069 * 070 * @param other the CellExtractor to add. 071 * @return {@code true} if the addition was successful and the state is valid 072 * @see #isValid() 073 * @see #subtract(CellExtractor) 074 */ 075 boolean add(CellExtractor other); 076 077 /** 078 * Creates a new instance of the CountingBloomFilter with the same properties as the current one. 079 * @return a copy of this CountingBloomFilter 080 */ 081 @Override 082 CountingBloomFilter copy(); 083 084 /** 085 * Returns the maximum allowable value for a cell count in this Counting filter. 086 * @return the maximum allowable value for a cell count in this Counting filter. 087 */ 088 int getMaxCell(); 089 090 /** 091 * Determines the maximum number of times the BitMapExtractor could have been merged into this 092 * counting filter. 093 * @param bitMapExtractor the BitMapExtractor to provide the indices. 094 * @return the maximum number of times the BitMapExtractor could have been inserted. 095 */ 096 default int getMaxInsert(final BitMapExtractor bitMapExtractor) { 097 if (!contains(bitMapExtractor)) { 098 return 0; 099 } 100 final long[] bitMaps = bitMapExtractor.asBitMapArray(); 101 final int[] max = { Integer.MAX_VALUE }; 102 processCells((x, y) -> { 103 if ((bitMaps[BitMaps.getLongIndex(x)] & BitMaps.getLongBit(x)) != 0) { 104 max[0] = max[0] <= y ? max[0] : y; 105 } 106 return true; 107 }); 108 return max[0]; 109 } 110 111 /** 112 * Determines the maximum number of times the Bloom filter could have been merged 113 * into this counting filter. 114 * @param bloomFilter the Bloom filter the check for. 115 * @return the maximum number of times the Bloom filter could have been inserted. 116 */ 117 default int getMaxInsert(final BloomFilter bloomFilter) { 118 return getMaxInsert((BitMapExtractor) bloomFilter); 119 } 120 121 /** 122 * Determines the maximum number of times the Cell Extractor could have been added. 123 * @param cellExtractor the extractor of cells. 124 * @return the maximum number of times the CellExtractor could have been inserted. 125 */ 126 int getMaxInsert(CellExtractor cellExtractor); 127 128 /** 129 * Determines the maximum number of times the Hasher could have been merged into this 130 * counting filter. 131 * @param hasher the Hasher to provide the indices. 132 * @return the maximum number of times the hasher could have been inserted. 133 */ 134 default int getMaxInsert(final Hasher hasher) { 135 return getMaxInsert(hasher.indices(getShape())); 136 } 137 138 // Modification Operations 139 140 /** 141 * Determines the maximum number of times the IndexExtractor could have been merged 142 * into this counting filter. 143 * <p>To determine how many times an indexExtractor could have been added create a CellExtractor 144 * from the indexExtractor and check that</p> 145 * @param indexExtractor the extractor to drive the count check. 146 * @return the maximum number of times the IndexExtractor could have been inserted. 147 * @see #getMaxInsert(CellExtractor) 148 */ 149 default int getMaxInsert(final IndexExtractor indexExtractor) { 150 return getMaxInsert(CellExtractor.from(indexExtractor.uniqueIndices()) ); 151 } 152 153 /** 154 * Returns {@code true} if the internal state is valid. 155 * 156 * <p>This flag is a warning that an addition or 157 * subtraction of cells from this filter resulted in an invalid cell for one or more 158 * indexes. For example this may occur if a cell for an index was 159 * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an 160 * addition operation.</p> 161 * 162 * <p>A counting Bloom filter that has an invalid state is no longer ensured to function 163 * identically to a standard Bloom filter instance that is the merge of all the Bloom filters 164 * that have been added to and not later subtracted from this counting Bloom filter.</p> 165 * 166 * <p>Note: The change to an invalid state may or may not be reversible. Implementations 167 * are expected to document their policy on recovery from an addition or removal operation 168 * that generated an invalid state.</p> 169 * 170 * @return {@code true} if the state is valid 171 */ 172 boolean isValid(); 173 174 /** 175 * Merges the specified BitMap extractor into this Bloom filter. 176 * 177 * <p>Specifically: all cells for the indexes identified by the {@code bitMapExtractor} will be incremented by 1.</p> 178 * 179 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 180 * 181 * @param bitMapExtractor the BitMapExtractor 182 * @return {@code true} if the removal was successful and the state is valid 183 * @see #isValid() 184 * @see #add(CellExtractor) 185 */ 186 @Override 187 default boolean merge(final BitMapExtractor bitMapExtractor) { 188 return merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 189 } 190 191 /** 192 * Merges the specified Bloom filter into this Bloom filter. 193 * 194 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p> 195 * 196 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 197 * IndexExtractor.</p> 198 * 199 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 200 * 201 * @param other the other Bloom filter 202 * @return {@code true} if the removal was successful and the state is valid 203 * @see #isValid() 204 * @see #add(CellExtractor) 205 */ 206 @Override 207 default boolean merge(final BloomFilter other) { 208 Objects.requireNonNull(other, "other"); 209 return merge((IndexExtractor) other); 210 } 211 212 /** 213 * Merges the specified Hasher into this Bloom filter. 214 * 215 * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p> 216 * 217 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 218 * 219 * @param hasher the hasher 220 * @return {@code true} if the removal was successful and the state is valid 221 * @see #isValid() 222 * @see #add(CellExtractor) 223 */ 224 @Override 225 default boolean merge(final Hasher hasher) { 226 Objects.requireNonNull(hasher, "hasher"); 227 return merge(hasher.indices(getShape())); 228 } 229 230 /** 231 * Merges the specified index extractor into this Bloom filter. 232 * 233 * <p>Specifically: all unique cells for the indices identified by the {@code indexExtractor} will be incremented by 1.</p> 234 * 235 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 236 * 237 * <p>Notes:</p> 238 * <ul> 239 * <li>If indices that are returned multiple times should be incremented multiple times convert the IndexExtractor 240 * to a CellExtractor and add that.</li> 241 * <li>Implementations should throw {@code IllegalArgumentException} and no other exception on bad input.</li> 242 * </ul> 243 * @param indexExtractor the IndexExtractor 244 * @return {@code true} if the removal was successful and the state is valid 245 * @see #isValid() 246 * @see #add(CellExtractor) 247 */ 248 @Override 249 default boolean merge(final IndexExtractor indexExtractor) { 250 Objects.requireNonNull(indexExtractor, "indexExtractor"); 251 try { 252 return add(CellExtractor.from(indexExtractor.uniqueIndices())); 253 } catch (final IndexOutOfBoundsException e) { 254 throw new IllegalArgumentException( 255 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e); 256 } 257 } 258 259 /** 260 * Removes the specified BitMapExtractor from this Bloom filter. 261 * 262 * <p>Specifically all cells for the indices produced by the {@code bitMapExtractor} will be 263 * decremented by 1.</p> 264 * 265 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 266 * 267 * @param bitMapExtractor the BitMapExtractor to provide the indexes 268 * @return {@code true} if the removal was successful and the state is valid 269 * @see #isValid() 270 * @see #subtract(CellExtractor) 271 */ 272 default boolean remove(final BitMapExtractor bitMapExtractor) { 273 return remove(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 274 } 275 276 /** 277 * Removes the specified Bloom filter from this Bloom filter. 278 * 279 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p> 280 * 281 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 282 * IndexExtractor.</p> 283 * 284 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 285 * 286 * @param other the other Bloom filter 287 * @return {@code true} if the removal was successful and the state is valid 288 * @see #isValid() 289 * @see #subtract(CellExtractor) 290 */ 291 default boolean remove(final BloomFilter other) { 292 return remove((IndexExtractor) other); 293 } 294 295 /** 296 * Removes the unique values from the specified hasher from this Bloom filter. 297 * 298 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 299 * decremented by 1.</p> 300 * 301 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 302 * 303 * @param hasher the hasher to provide the indexes 304 * @return {@code true} if the removal was successful and the state is valid 305 * @see #isValid() 306 * @see #subtract(CellExtractor) 307 */ 308 default boolean remove(final Hasher hasher) { 309 Objects.requireNonNull(hasher, "hasher"); 310 return remove(hasher.indices(getShape())); 311 } 312 313 /** 314 * Removes the values from the specified IndexExtractor from the Bloom filter from this Bloom filter. 315 * 316 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 317 * decremented by 1.</p> 318 * 319 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 320 * 321 * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexExtractor 322 * to a CellExtractor and subtract that.</p> 323 * 324 * @param indexExtractor the IndexExtractor to provide the indexes 325 * @return {@code true} if the removal was successful and the state is valid 326 * @see #isValid() 327 * @see #subtract(CellExtractor) 328 */ 329 default boolean remove(final IndexExtractor indexExtractor) { 330 Objects.requireNonNull(indexExtractor, "indexExtractor"); 331 try { 332 return subtract(CellExtractor.from(indexExtractor.uniqueIndices())); 333 } catch (final IndexOutOfBoundsException e) { 334 throw new IllegalArgumentException( 335 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits())); 336 } 337 } 338 339 340 /** 341 * Adds the specified CellExtractor to this Bloom filter. 342 * 343 * <p>Specifically 344 * all cells for the indexes identified by the {@code other} will be decremented 345 * by their corresponding values in the {@code other}.</p> 346 * 347 * <p>This method will return true if the filter is valid after the operation.</p> 348 * 349 * @param other the CellExtractor to subtract. 350 * @return {@code true} if the subtraction was successful and the state is valid 351 * @see #isValid() 352 * @see #add(CellExtractor) 353 */ 354 boolean subtract(CellExtractor other); 355 356 /** 357 * The default implementation is a no-op since the counting bloom filter returns an unique IndexExtractor by default. 358 * @return this counting Bloom filter. 359 */ 360 @Override 361 default IndexExtractor uniqueIndices() { 362 return this; 363 } 364}