00001 /* Copyright 2005 Ben Hyde 00002 * 00003 * Licensed under the Apache License, Version 2.0 (the "License"); 00004 * you may not use this file except in compliance with the License. 00005 * You may obtain a copy of the License at 00006 * 00007 * http://www.apache.org/licenses/LICENSE-2.0 00008 * 00009 * Unless required by applicable law or agreed to in writing, software 00010 * distributed under the License is distributed on an "AS IS" BASIS, 00011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 * See the License for the specific language governing permissions and 00013 * limitations under the License. 00014 */ 00015 00016 /** \file ssp_bcount.h 00017 * \brief Silver Spoon's Bloom Counters, a C include file. 00018 * 00019 * This API defines an opaque type ssp_bcount_t which is then used to 00020 * observe a stream of symbols. It count, aproximately, the 00021 * number of times each symbol occurs in the stream during the 00022 * current window. 00023 * 00024 * The counts are a lower bound on the number of times the symbol 00025 * appears. A typical use for this abstraction is to filter a 00026 * huge universe of events to find those events that are happening 00027 * a lot. For example, you could use it to discover the list of 00028 * high volume visitors to your web site. You could use it to find 00029 * the popular phrases in a corpus of text. You could use it to 00030 * find files that should be moved to a cache. 00031 * 00032 * The user must provide only two operations on symbols, two hashs. 00033 * Symbols are represented as void * on the interface. 00034 * 00035 * That algorithum used here is a direct decendent of the one 00036 * described in <>. See also section 8.1 in <> for a overview. 00037 * 00038 */ 00039 00040 #include "ssp_items.h" 00041 #include <apr_pools.h> 00042 00043 /** \brief The type ssp_bcount_t is the cornerstone of this API. 00044 * 00045 * It is the structure that accumlates the counts and manages 00046 * the current window. It is allocated in an apr_pool provided 00047 * by the user and survives until that pool is cleared. The 00048 * functions in this API are all operations upon it. 00049 */ 00050 typedef struct ssp_bcount_s ssp_bcount_t; 00051 00052 00053 /** \brief Create a filter for detecting hot symbols. 00054 * 00055 * Instantiate a fresh ssp_bcount_t in the pool given. The user provides 00056 * two hash functions that will be used on the symbols counted. 00057 * Two parameters filter_scale and filter_scatter configure how 00058 * accurate our estimated counts can be. 00059 * 00060 * The counters are a varient of bloom filters. Unlike a traditional 00061 * bloom filter, which uses a bit map to manage a set, we allocate 00062 * a set of counters instead of a set of bits. The scale sets the 00063 * size of that set. Scale is a small integer defining the number 00064 * of bits used to index the set. So if you pass in 10, you get a 00065 * counter set of 2^10th. 00066 * 00067 * The scatter is how many counters are incremented when ever a 00068 * symbol is noted. It is directly analgous to the number of bits 00069 * set or each member of the set represented in a bloom filter. 00070 * Scatter must less than 32. 00071 */ 00072 extern ssp_bcount_t *ssp_bcount_create(apr_pool_t *pool, 00073 ssp_hash_function_t *h1, 00074 ssp_hash_function_t *h2, 00075 apr_uint16_t filter_scale, 00076 apr_uint16_t filter_scatter); 00077 00078 /** \brief Advance the current 00079 * 00080 * Counts are maintained for the current window only. Given a 00081 * sequence of calls on this routine c7, c8, c9, c10 the current 00082 * interval runs from moment of c9 thru the current moment. For 00083 * example if this is called each hour at the top of the hour and it 00084 * is currently 10:35 the current interval is 9:00..10:35. You can 00085 * clear all counters by making two calls on this. 00086 **/ 00087 extern void ssp_bcount_tick(ssp_bcount_t *h); 00088 00089 /** \brief Count the occurance of a symbol. 00090 * 00091 * Calls on ssp_bcount_insert notify the counters the passed symbol has 00092 * just occured. I.e. it occured during the current window. 00093 **/ 00094 extern apr_uint32_t ssp_bcount_inc(ssp_bcount_t *h, void *symbol); 00095 00096 /** \brief Fetch the estimated count for this symbol. 00097 * 00098 * Does not increment the estimated count. 00099 **/ 00100 apr_uint32_t ssp_bcount_lookup(ssp_bcount_t *h, void *symbol); 00101 00102 00103 /** \brief Rarely used counter state. 00104 * 00105 * While generally you needn't know what's in a ssp_bcount_t there 00106 * are a few applications for direct access. For example to save 00107 * and restore it. Or, for example, you want to use it to estimate 00108 * the maxium over the set of counts. 00109 */ 00110 struct ssp_bcount_s { 00111 apr_pool_t *object_pool; /**< Subpool created to hold this and it's children */ 00112 ssp_hash_function_t *h1; /**< 1st hash function on symbols */ 00113 ssp_hash_function_t *h2; /**< 2nd hash function on symbols */ 00114 apr_uint32_t scale; /**< 2^scale is how big the counter space is. */ 00115 apr_uint32_t scatter; /**< How many counters represent a given symbol */ 00116 apr_uint32_t mask; /**< Used to map hash bits to index value */ 00117 apr_uint32_t *counters; /**< Counters of the current interval */ 00118 apr_uint32_t *next_counters; /**< Counters that become live when we tick. */ 00119 };
1.4.4