Main Page | Data Structures | Directories | File List | Data Fields | Globals

ssp_bcount.h

Go to the documentation of this file.
00001 /* Copyright 2005 Ben Hyde
00002  *
00003  * Licensed under the Apache License, Version 2.0 (the "License");
00004  * you may not use this file except in compliance with the License.
00005  * You may obtain a copy of the License at
00006  *
00007  *     http://www.apache.org/licenses/LICENSE-2.0
00008  *
00009  * Unless required by applicable law or agreed to in writing, software
00010  * distributed under the License is distributed on an "AS IS" BASIS,
00011  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012  * See the License for the specific language governing permissions and
00013  * limitations under the License.
00014  */
00015 
00016 /** \file ssp_bcount.h
00017  * \brief Silver Spoon's Bloom Counters, a C include file.
00018  *
00019  * This API defines an opaque type ssp_bcount_t which is then used to
00020  * observe a stream of symbols.  It count, aproximately, the
00021  * number of times each symbol occurs in the stream during the
00022  * current window.
00023  *
00024  * The counts are a lower bound on the number of times the symbol
00025  * appears.  A typical use for this abstraction is to filter a
00026  * huge universe of events to find those events that are happening
00027  * a lot. For example, you could use it to discover the list of
00028  * high volume visitors to your web site.  You could use it to find
00029  * the popular phrases in a corpus of text.  You could use it to
00030  * find files that should be moved to a cache.
00031  *
00032  * The user must provide only two operations on symbols, two hashs.
00033  * Symbols are represented as void * on the interface.
00034  *
00035  * That algorithum used here is a direct decendent of the one
00036  * described in <>.  See also section 8.1 in <> for a overview.
00037  *
00038  */
00039 
00040 #include "ssp_items.h"
00041 #include <apr_pools.h>
00042 
00043 /** \brief The type ssp_bcount_t is the cornerstone of this API.
00044  *
00045  * It is the structure that accumlates the counts and manages
00046  * the current window.  It is allocated in an apr_pool provided
00047  * by the user and survives until that pool is cleared.  The
00048  * functions in this API are all operations upon it.
00049  */
00050 typedef struct ssp_bcount_s ssp_bcount_t;
00051 
00052 
00053 /** \brief Create a filter for detecting hot symbols.
00054  *
00055  * Instantiate a fresh ssp_bcount_t in the pool given.  The user provides
00056  * two hash functions that will be used on the symbols counted.
00057  * Two parameters filter_scale and filter_scatter configure how
00058  * accurate our estimated counts can be.
00059  *
00060  * The counters are a varient of bloom filters.  Unlike a traditional
00061  * bloom filter, which uses a bit map to manage a set, we allocate
00062  * a set of counters instead of a set of bits.  The scale sets the
00063  * size of that set.  Scale is a small integer defining the number
00064  * of bits used to index the set.  So if you pass in 10, you get a
00065  * counter set of 2^10th.
00066  *
00067  * The scatter is how many counters are incremented when ever a
00068  * symbol is noted.  It is directly analgous to the number of bits
00069  * set or each member of the set represented in a bloom filter.
00070  * Scatter must less than 32.
00071  */
00072 extern ssp_bcount_t *ssp_bcount_create(apr_pool_t *pool,
00073                                        ssp_hash_function_t *h1,
00074                                        ssp_hash_function_t *h2,
00075                                        apr_uint16_t filter_scale,
00076                                        apr_uint16_t filter_scatter);
00077 
00078 /** \brief Advance the current 
00079  *
00080  * Counts are maintained for the current window only.  Given a
00081  * sequence of calls on this routine c7, c8, c9, c10 the current
00082  * interval runs from moment of c9 thru the current moment.  For
00083  * example if this is called each hour at the top of the hour and it
00084  * is currently 10:35 the current interval is 9:00..10:35.  You can
00085  * clear all counters by making two calls on this.
00086  **/
00087 extern void ssp_bcount_tick(ssp_bcount_t *h);
00088 
00089 /** \brief Count the occurance of a symbol.
00090  * 
00091  * Calls on ssp_bcount_insert notify the counters the passed symbol has
00092  * just occured.  I.e.  it occured during the current window.
00093  **/
00094 extern apr_uint32_t ssp_bcount_inc(ssp_bcount_t *h, void *symbol);
00095 
00096 /** \brief Fetch the estimated count for this symbol.
00097  *
00098  * Does not increment the estimated count.
00099  **/
00100 apr_uint32_t ssp_bcount_lookup(ssp_bcount_t *h, void *symbol);
00101 
00102 
00103 /** \brief Rarely used counter state.
00104  *
00105  * While generally you needn't know what's in a ssp_bcount_t there
00106  * are a few applications for direct access.  For example to save
00107  * and restore it.  Or, for example, you want to use it to estimate
00108  * the maxium over the set of counts.
00109  */
00110 struct ssp_bcount_s {
00111   apr_pool_t *object_pool; /**< Subpool created to hold this and it's children */
00112   ssp_hash_function_t *h1;     /**< 1st hash function on symbols */
00113   ssp_hash_function_t *h2;     /**< 2nd hash function on symbols */
00114   apr_uint32_t scale;          /**< 2^scale is how big the counter space is. */
00115   apr_uint32_t scatter;        /**< How many counters represent a given symbol */
00116   apr_uint32_t mask;           /**< Used to map hash bits to index value  */
00117   apr_uint32_t *counters;      /**< Counters of the current interval */
00118   apr_uint32_t *next_counters; /**< Counters that become live when we tick. */
00119 };

Generated on Thu Jul 28 22:20:52 2005 for HTP - a library for realtime detection of hotspots by  doxygen 1.4.4