由于需要对较大数量的个体内容的出现进行记录、去重、查找和存储, 便于后续的统计分析。
考虑使用bitmap来实现对个体出现的记录和去重。

bitmap的基础知识

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
var (
	tA = [8]byte{1, 2, 4, 8, 16, 32, 64, 128}
	tB = [8]byte{254, 253, 251, 247, 239, 223, 191, 127}
)

// newSlice creates a new byte slice with length l (in bits).
// The actual size in bits might be up to 7 bits larger because
// they are stored in a byte slice.
func newSlice(l int) []byte {
	remainder := l % 8
	if remainder != 0 {
		remainder = 1
	}
	return make([]byte, l/8+remainder)
}

// Get returns the value of bit i from map m.
// It doesn't check the bounds of the slice.
func get(m []byte, i int) bool {
  return m[i/8]&tA[i%8] != 0
}

// Set sets bit i of map m to value v.
// It doesn't check the bounds of the slice.
func set(m []byte, i int, v bool) {
	index := i / 8
	bit := i % 8
	if v {
		m[index] = m[index] | tA[bit]
	} else {
		m[index] = m[index] & tB[bit]
	}
}

// Len returns the length (in bits) of the provided byteslice.
// It will always be a multipile of 8 bits.
func length(m []byte) int {
	return len(m) * 8
}

// BitMap defines the stored message struct.
type BitMap struct {
	size int
	bits []byte
}

// NewBitMap creates a new bitmap slice.
func NewBitMap(l int) BitMap {
	return BitMap{
		size: l,
		bits: newSlice(l),
	}
}

// Size return bitmap used length.
func (b *BitMap) Size() int {
	return b.size
}

func (b *BitMap) CheckRange(i int) bool {
	if i > b.size || i < 1 {
		return false
	}
	return true
}

func (b *BitMap) Get(i int) bool {
	if i <= 0 || i > b.size {
		return false
	}
	i--
	return get(b.bits, i)
}

func (b *BitMap) Set(i int, v bool) {
	if i <= 0 || i > b.size {
		return
	}
	i--
	set(b.bits, i, v)
}

func (b *BitMap) SetRange(start, end int) {
	for i := start; i <= end; i++ {
		b.Set(i, true)
	}
}

func (b *BitMap) Count() int {
	count := 0
	for i := 0; i < b.Size(); i++ {
		if b.Get(i) {
			count++
		}
	}
	return count
}

bitmap的序列化和反序列化

1
2
3
4
5
6
7
8
func (b *BitMap) ToString() string {
	var sb strings.Builder
	for i := 0; i < len(b.bits); i++ {
		sb.WriteString(strconv.Itoa(int(b.bits[i])))
		sb.WriteString(" ")
	}
	return sb.String()
}
1
2
3
4
5
6
7
8
9
func FromString(input string) BitMap {
	inputSlice := strings.Split(input, " ")
	bits := make([]byte, 0, len(inputSlice))
	for i := 0; i < len(inputSlice)-1; i++ {
		bitInt, _ := strconv.Atoi(inputSlice[i])
		bits = append(bits, uint8(bitInt))
	}
	return BitMap{size: length(bits), bits: bits}
}