golang bloom filter实现

2,006 阅读1分钟
package main

import (
	"fmt"
	"math"
)

type BloomFilter struct {
	MLength   int64   //过滤器长度
	MArr      []int64 //过滤器数组
	NCount    int64   //插入元素个数
	FalseRate float64 //误报率
	KHash     int     //hash函数个数
}

//数学公式
// k = ln2 * m /n
// m = - n log p / (log2 )^2

func main() {

	bf := NewBloomFilter(0.0001, 100)
	fmt.Println(bf)
	bf.set([]byte("zhuang"))
	bf.set([]byte("jing"))
	bf.set([]byte("peng"))

	fmt.Println(bf.check([]byte("aa")))
	fmt.Println(bf)
}

func NewBloomFilter(falseRate float64, size int64) *BloomFilter {
	m, k := getFilterParam(falseRate, size)

	return &BloomFilter{
		MLength:   m,
		MArr:      make([]int64, m),
		NCount:    size,
		FalseRate: falseRate,
		KHash:     k,
	}
}

func getFilterParam(falseRate float64, size int64) (int64, int) {

	m := -1 * math.Log(falseRate) * float64(size) / (math.Ln2 * math.Ln2)
	k := m * math.Ln2 / float64(size)
	return int64(m), int(k)
}

func (bf *BloomFilter) set(data []byte) {
	for i := 0; i < bf.KHash; i++ {
		index := bf.hashFun(data, i)
		bf.MArr[index] = 1
	}
}

func (bf *BloomFilter) check(data []byte) bool {
	for i := 0; i < bf.KHash; i++ {
		index := bf.hashFun(data, i)
		if bf.MArr[index] == 0 {
			return false
		}
	}
	return true
}
func (bf *BloomFilter) hashFun(data []byte, count int) int64 {
	hash := int64(5381)
	for i := 0; i < len(data); i++ {
		hash = hash*33 + int64(data[i]) + int64(count)
	}

	res := hash % (bf.MLength - 1)
	return int64(math.Abs(float64(res)))
}