00001 /* Copyright 2005 Ben Hyde 00002 * 00003 * Licensed under the Apache License, Version 2.0 (the "License"); 00004 * you may not use this file except in compliance with the License. 00005 * You may obtain a copy of the License at 00006 * 00007 * http://www.apache.org/licenses/LICENSE-2.0 00008 * 00009 * Unless required by applicable law or agreed to in writing, software 00010 * distributed under the License is distributed on an "AS IS" BASIS, 00011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 * See the License for the specific language governing permissions and 00013 * limitations under the License. 00014 */ 00015 00016 /** \file ssp_bfilter.h 00017 * \brief The API for a bloom filter. 00018 * 00019 * This API defines an opaque type ssp_bfilter_t used to represent a 00020 * set of symbols, but with only two operations on that set. 00021 * membership, and insert. 00022 * 00023 * That algorithum used is known as a Bloom filter, see 00024 * <> for additional info. As such it is possible for the 00025 * membership query to return false positives. 00026 * 00027 * \dot 00028 * digraph { 00029 * rankdir=LR; 00030 * node [shape=plaintext]; 00031 * create -> use -> retire [style=dotted]; 00032 * {rank=same; use; ssp_bfilter_t; bloom_insert; bloom_member_p; } 00033 * {rank=same; create; bloom_make; } 00034 * {rank=same; retire; apr_clear_pool; } 00035 * bloom_make -> ssp_bfilter_t -> apr_clear_pool; 00036 * ssp_bfilter_t -> {bloom_insert; bloom_member_p } -> ssp_bfilter_t; 00037 * { node [shape=rectangle,label="a ssp_bfilter_t"]; ssp_bfilter_t;} 00038 * } 00039 * \enddot 00040 * 00041 */ 00042 00043 #include "ssp_items.h" 00044 #include <apr_pools.h> 00045 00046 /** \brief The type ssp_bfilter_t is the cornerstone of this API. 00047 * 00048 * Instances are allocated in an APR pool and survive until 00049 * that pool is cleared. You may insert symbols into the 00050 * set, and enquire if a symbol is a member of a set. 00051 * 00052 */ 00053 typedef struct ssp_bfilter_s ssp_bfilter_t; 00054 00055 /** \brief Create a ssp_bfilter_t, i.e. bloom filter. 00056 * 00057 * Instantiate a fresh ssp_bfilter_t in the pool given. The user provides 00058 * two hash functions that will be used to map symbols into the 00059 * state of the filter. Two parameters filter_scale and filter_phase 00060 * configure how accurate the filter is. 00061 * 00062 * Scales, a small integer, sets the size of the bit map. The bit 00063 * map is 2^scale in size. While the phase sets how many bits 00064 * are set in the bitmap for each symbol inserted. 00065 */ 00066 extern ssp_bfilter_t *ssp_bfilter_create(apr_pool_t *pool, 00067 ssp_hash_function_t *h1, 00068 ssp_hash_function_t *h2, 00069 apr_uint16_t filter_scale, 00070 apr_uint16_t filter_phase); 00071 00072 /** The number of one bits in the underlying bitmap. 00073 * 00074 * Since 2^scale is the total number of bits you can use this 00075 * to get a feel for how likely false positives are becoming. 00076 * This can be used to compute how saturated the filter is, and 00077 * in combination with the phase estimate the chance for false 00078 * positives. 00079 */ 00080 extern apr_uint32_t ssp_bfilter_density(ssp_bfilter_t *f); 00081 00082 /** \brief Insert a symbol into the filter. 00083 * 00084 * Calls on bloom_insert adds a symbol to the set of symbols 00085 * in the bloom filter. 00086 **/ 00087 extern void ssp_bfilter_insert(ssp_bfilter_t *b, void *symbol); 00088 00089 /** \brief Return false if symbol is not a member of the set. 00090 * 00091 * This never returns a false negative but may occationally 00092 * return a false positive. 00093 **/ 00094 int ssp_bfilter_member_p(ssp_bfilter_t *h, void *symbol); 00095 00096 /** \brief The bloom filter including bitmap, hash functions, etc. etc. 00097 * 00098 * It's unlikely users will need access to the internals of 00099 * ssp_bfilter_t, but it might be useful. 00100 */ 00101 struct ssp_bfilter_s { 00102 apr_pool_t *object_pool; /**< This and it's children reside in this pool */ 00103 ssp_hash_function_t *h1; /**< ... */ 00104 ssp_hash_function_t *h2; /**< ... */ 00105 apr_uint32_t scale; /**< The bit map has 2^scale bits in it */ 00106 apr_uint32_t phase; /**< Each symbol sets this many bits */ 00107 apr_uint32_t mask; /**< 2^scale-1, used to map hash into bitmap index */ 00108 apr_uint64_t *bitmap; /**< The bitmap of scale */ 00109 };
1.4.4