1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package intsets provides Sparse, a compact and fast representation
6 // for sparse sets of int values.
8 // The time complexity of the operations Len, Insert, Remove and Has
9 // is in O(n) but in practice those methods are faster and more
10 // space-efficient than equivalent operations on sets based on the Go
11 // map type. The IsEmpty, Min, Max, Clear and TakeMin operations
12 // require constant time.
14 package intsets // import "golang.org/x/tools/container/intsets"
17 // - Add InsertAll(...int), RemoveAll(...int)
18 // - Add 'bool changed' results for {Intersection,Difference}With too.
20 // TODO(adonovan): implement Dense, a dense bit vector with a similar API.
21 // The space usage would be proportional to Max(), not Len(), and the
22 // implementation would be based upon big.Int.
24 // TODO(adonovan): opt: make UnionWith and Difference faster.
25 // These are the hot-spots for go/pointer.
32 // A Sparse is a set of int values.
33 // Sparse operations (even queries) are not concurrency-safe.
35 // The zero value for Sparse is a valid empty set.
37 // Sparse sets must be copied using the Copy method, not by assigning
41 // An uninitialized Sparse represents an empty set.
42 // An empty set may also be represented by
43 // root.next == root.prev == &root.
45 // The root is always the block with the smallest offset.
46 // It can be empty, but only if it is the only block; in that case, offset is
47 // MaxInt (which is not a valid offset).
55 bitsPerWord = 8 << (_m>>8&1 + _m>>16&1 + _m>>32&1)
56 bitsPerBlock = 256 // optimal value for go/pointer solver performance
57 wordsPerBlock = bitsPerBlock / bitsPerWord
60 // Limit values of implementation-specific int type.
62 MaxInt = int(^uint(0) >> 1)
66 // -- block ------------------------------------------------------------
68 // A set is represented as a circular doubly-linked list of blocks,
69 // each containing an offset and a bit array of fixed size
70 // bitsPerBlock; the blocks are ordered by increasing offset.
72 // The set contains an element x iff the block whose offset is x - (x
73 // mod bitsPerBlock) has the bit (x mod bitsPerBlock) set, where mod
74 // is the Euclidean remainder.
76 // A block may only be empty transiently.
79 offset int // offset mod bitsPerBlock == 0
80 bits [wordsPerBlock]word // contains at least one set bit
81 next, prev *block // doubly-linked list of blocks
84 // wordMask returns the word index (in block.bits)
85 // and single-bit mask for the block's ith bit.
86 func wordMask(i uint) (w uint, mask word) {
88 mask = 1 << (i % bitsPerWord)
92 // insert sets the block b's ith bit and
93 // returns true if it was not already set.
95 func (b *block) insert(i uint) bool {
96 w, mask := wordMask(i)
97 if b.bits[w]&mask == 0 {
104 // remove clears the block's ith bit and
105 // returns true if the bit was previously set.
106 // NB: may leave the block empty.
108 func (b *block) remove(i uint) bool {
109 w, mask := wordMask(i)
110 if b.bits[w]&mask != 0 {
117 // has reports whether the block's ith bit is set.
118 func (b *block) has(i uint) bool {
119 w, mask := wordMask(i)
120 return b.bits[w]&mask != 0
123 // empty reports whether b.len()==0, but more efficiently.
124 func (b *block) empty() bool {
125 for _, w := range b.bits {
133 // len returns the number of set bits in block b.
134 func (b *block) len() int {
136 for _, w := range b.bits {
142 // max returns the maximum element of the block.
143 // The block must not be empty.
144 func (b *block) max() int {
145 bi := b.offset + bitsPerBlock
146 // Decrement bi by number of high zeros in last.bits.
147 for i := len(b.bits) - 1; i >= 0; i-- {
148 if w := b.bits[i]; w != 0 {
149 return bi - nlz(w) - 1
153 panic("BUG: empty block")
156 // min returns the minimum element of the block,
157 // and also removes it if take is set.
158 // The block must not be initially empty.
159 // NB: may leave the block empty.
160 func (b *block) min(take bool) int {
161 for i, w := range b.bits {
165 b.bits[i] = w &^ (1 << uint(tz))
167 return b.offset + int(i*bitsPerWord) + tz
170 panic("BUG: empty block")
173 // lowerBound returns the smallest element of the block that is greater than or
174 // equal to the element corresponding to the ith bit. If there is no such
175 // element, the second return value is false.
176 func (b *block) lowerBound(i uint) (int, bool) {
178 bit := i % bitsPerWord
180 if val := b.bits[w] >> bit; val != 0 {
181 return b.offset + int(i) + ntz(val), true
184 for w++; w < wordsPerBlock; w++ {
185 if val := b.bits[w]; val != 0 {
186 return b.offset + int(w*bitsPerWord) + ntz(val), true
193 // forEach calls f for each element of block b.
194 // f must not mutate b's enclosing Sparse.
195 func (b *block) forEach(f func(int)) {
196 for i, w := range b.bits {
197 offset := b.offset + i*bitsPerWord
198 for bi := 0; w != 0 && bi < bitsPerWord; bi++ {
208 // offsetAndBitIndex returns the offset of the block that would
209 // contain x and the bit index of x within that block.
211 func offsetAndBitIndex(x int) (int, uint) {
212 mod := x % bitsPerBlock
214 // Euclidean (non-negative) remainder
217 return x - mod, uint(mod)
220 // -- Sparse --------------------------------------------------------------
222 // none is a shared, empty, sentinel block that indicates the end of a block
226 // Dummy type used to generate an implicit panic. This must be defined at the
227 // package level; if it is defined inside a function, it prevents the inlining
229 type to_copy_a_sparse_you_must_call_its_Copy_method struct{}
231 // init ensures s is properly initialized.
232 func (s *Sparse) init() {
234 if root.next == nil {
238 } else if root.next.prev != root {
239 // Copying a Sparse x leads to pernicious corruption: the
240 // new Sparse y shares the old linked list, but iteration
241 // on y will never encounter &y.root so it goes into a
242 // loop. Fail fast before this occurs.
243 // We don't want to call panic here because it prevents the
244 // inlining of this function.
245 _ = (interface{}(nil)).(to_copy_a_sparse_you_must_call_its_Copy_method)
249 func (s *Sparse) first() *block {
251 if s.root.offset == MaxInt {
257 // next returns the next block in the list, or end if b is the last block.
258 func (s *Sparse) next(b *block) *block {
259 if b.next == &s.root {
265 // prev returns the previous block in the list, or end if b is the first block.
266 func (s *Sparse) prev(b *block) *block {
267 if b.prev == &s.root {
273 // IsEmpty reports whether the set s is empty.
274 func (s *Sparse) IsEmpty() bool {
275 return s.root.next == nil || s.root.offset == MaxInt
278 // Len returns the number of elements in the set s.
279 func (s *Sparse) Len() int {
281 for b := s.first(); b != &none; b = s.next(b) {
287 // Max returns the maximum element of the set s, or MinInt if s is empty.
288 func (s *Sparse) Max() int {
292 return s.root.prev.max()
295 // Min returns the minimum element of the set s, or MaxInt if s is empty.
296 func (s *Sparse) Min() int {
300 return s.root.min(false)
303 // LowerBound returns the smallest element >= x, or MaxInt if there is no such
305 func (s *Sparse) LowerBound(x int) int {
306 offset, i := offsetAndBitIndex(x)
307 for b := s.first(); b != &none; b = s.next(b) {
308 if b.offset > offset {
311 if b.offset == offset {
312 if y, ok := b.lowerBound(i); ok {
320 // block returns the block that would contain offset,
321 // or nil if s contains no such block.
322 // Precondition: offset is a multiple of bitsPerBlock.
323 func (s *Sparse) block(offset int) *block {
324 for b := s.first(); b != &none && b.offset <= offset; b = s.next(b) {
325 if b.offset == offset {
332 // Insert adds x to the set s, and reports whether the set grew.
333 func (s *Sparse) Insert(x int) bool {
334 offset, i := offsetAndBitIndex(x)
337 for ; b != &none && b.offset <= offset; b = s.next(b) {
338 if b.offset == offset {
343 // Insert new block before b.
344 new := s.insertBlockBefore(b)
349 // removeBlock removes a block and returns the block that followed it (or end if
350 // it was the last block).
351 func (s *Sparse) removeBlock(b *block) *block {
355 if b.next == &s.root {
362 if first == &s.root {
363 // This was the only block.
367 s.root.offset = first.offset
368 s.root.bits = first.bits
369 if first.next == &s.root {
370 // Single block remaining.
371 s.root.next = &s.root
372 s.root.prev = &s.root
374 s.root.next = first.next
375 first.next.prev = &s.root
380 // Remove removes x from the set s, and reports whether the set shrank.
381 func (s *Sparse) Remove(x int) bool {
382 offset, i := offsetAndBitIndex(x)
383 if b := s.block(offset); b != nil {
395 // Clear removes all elements from the set s.
396 func (s *Sparse) Clear() {
404 // If set s is non-empty, TakeMin sets *p to the minimum element of
405 // the set s, removes that element from the set and returns true.
406 // Otherwise, it returns false and *p is undefined.
408 // This method may be used for iteration over a worklist like so:
411 // for worklist.TakeMin(&x) { use(x) }
413 func (s *Sparse) TakeMin(p *int) bool {
417 *p = s.root.min(true)
419 s.removeBlock(&s.root)
424 // Has reports whether x is an element of the set s.
425 func (s *Sparse) Has(x int) bool {
426 offset, i := offsetAndBitIndex(x)
427 if b := s.block(offset); b != nil {
433 // forEach applies function f to each element of the set s in order.
435 // f must not mutate s. Consequently, forEach is not safe to expose
436 // to clients. In any case, using "range s.AppendTo()" allows more
437 // natural control flow with continue/break/return.
439 func (s *Sparse) forEach(f func(int)) {
440 for b := s.first(); b != &none; b = s.next(b) {
445 // Copy sets s to the value of x.
446 func (s *Sparse) Copy(x *Sparse) {
455 sb = s.insertBlockBefore(sb)
457 sb.offset = xb.offset
465 // insertBlockBefore returns a new block, inserting it before next.
466 // If next is the root, the root is replaced. If next is end, the block is
467 // inserted at the end.
468 func (s *Sparse) insertBlockBefore(next *block) *block {
471 panic("BUG: passed block with empty set")
477 // Special case: we need to create a new block that will become the root
478 // block.The old root block becomes the second block.
483 if second.next == &s.root {
484 s.root.prev = &second
486 s.root.prev = second.prev
487 second.next.prev = &second
488 second.prev = &s.root
493 // Insert before root.
504 // discardTail removes block b and all its successors from s.
505 func (s *Sparse) discardTail(b *block) {
510 b.prev.next = &s.root
516 // IntersectionWith sets s to the intersection s ∩ x.
517 func (s *Sparse) IntersectionWith(x *Sparse) {
524 for xb != &none && sb != &none {
526 case xb.offset < sb.offset:
529 case xb.offset > sb.offset:
530 sb = s.removeBlock(sb)
534 for i := range sb.bits {
535 r := xb.bits[i] & sb.bits[i]
542 // sb will be overwritten or removed
552 // Intersection sets s to the intersection x ∩ y.
553 func (s *Sparse) Intersection(x, y *Sparse) {
556 s.IntersectionWith(y)
559 s.IntersectionWith(x)
569 for xb != &none && yb != &none {
571 case xb.offset < yb.offset:
574 case xb.offset > yb.offset:
580 sb = s.insertBlockBefore(sb)
582 sb.offset = xb.offset
585 for i := range sb.bits {
586 r := xb.bits[i] & yb.bits[i]
593 // sb will be overwritten or removed
603 // Intersects reports whether s ∩ x ≠ ∅.
604 func (s *Sparse) Intersects(x *Sparse) bool {
607 for sb != &none && xb != &none {
609 case xb.offset < sb.offset:
611 case xb.offset > sb.offset:
614 for i := range sb.bits {
615 if sb.bits[i]&xb.bits[i] != 0 {
626 // UnionWith sets s to the union s ∪ x, and reports whether s grew.
627 func (s *Sparse) UnionWith(x *Sparse) bool {
636 if sb != &none && sb.offset == xb.offset {
637 for i := range xb.bits {
638 if sb.bits[i] != xb.bits[i] {
639 sb.bits[i] |= xb.bits[i]
644 } else if sb == &none || sb.offset > xb.offset {
645 sb = s.insertBlockBefore(sb)
646 sb.offset = xb.offset
657 // Union sets s to the union x ∪ y.
658 func (s *Sparse) Union(x, y *Sparse) {
674 for xb != &none || yb != &none {
676 sb = s.insertBlockBefore(sb)
679 case yb == &none || (xb != &none && xb.offset < yb.offset):
680 sb.offset = xb.offset
684 case xb == &none || (yb != &none && yb.offset < xb.offset):
685 sb.offset = yb.offset
690 sb.offset = xb.offset
691 for i := range xb.bits {
692 sb.bits[i] = xb.bits[i] | yb.bits[i]
703 // DifferenceWith sets s to the difference s ∖ x.
704 func (s *Sparse) DifferenceWith(x *Sparse) {
712 for xb != &none && sb != &none {
714 case xb.offset > sb.offset:
717 case xb.offset < sb.offset:
722 for i := range sb.bits {
723 r := sb.bits[i] & ^xb.bits[i]
728 sb = s.removeBlock(sb)
737 // Difference sets s to the difference x ∖ y.
738 func (s *Sparse) Difference(x, y *Sparse) {
756 for xb != &none && yb != &none {
757 if xb.offset > yb.offset {
758 // y has block, x has &none
764 sb = s.insertBlockBefore(sb)
766 sb.offset = xb.offset
769 case xb.offset < yb.offset:
770 // x has block, y has &none
776 // x and y have corresponding blocks
778 for i := range sb.bits {
779 r := xb.bits[i] & ^yb.bits[i]
786 // sb will be overwritten or removed
796 sb = s.insertBlockBefore(sb)
798 sb.offset = xb.offset
808 // SymmetricDifferenceWith sets s to the symmetric difference s ∆ x.
809 func (s *Sparse) SymmetricDifferenceWith(x *Sparse) {
817 for xb != &none && sb != &none {
819 case sb.offset < xb.offset:
821 case xb.offset < sb.offset:
822 nb := s.insertBlockBefore(sb)
823 nb.offset = xb.offset
828 for i := range sb.bits {
829 r := sb.bits[i] ^ xb.bits[i]
834 sb = s.removeBlock(sb)
842 for xb != &none { // append the tail of x to s
843 sb = s.insertBlockBefore(sb)
844 sb.offset = xb.offset
851 // SymmetricDifference sets s to the symmetric difference x ∆ y.
852 func (s *Sparse) SymmetricDifference(x, y *Sparse) {
858 s.SymmetricDifferenceWith(y)
861 s.SymmetricDifferenceWith(x)
868 for xb != &none && yb != &none {
870 sb = s.insertBlockBefore(sb)
873 case yb.offset < xb.offset:
874 sb.offset = yb.offset
878 case xb.offset < yb.offset:
879 sb.offset = xb.offset
885 for i := range sb.bits {
886 r := xb.bits[i] ^ yb.bits[i]
891 sb.offset = xb.offset
899 for xb != &none { // append the tail of x to s
901 sb = s.insertBlockBefore(sb)
903 sb.offset = xb.offset
909 for yb != &none { // append the tail of y to s
911 sb = s.insertBlockBefore(sb)
913 sb.offset = yb.offset
922 // SubsetOf reports whether s ∖ x = ∅.
923 func (s *Sparse) SubsetOf(x *Sparse) bool {
932 case xb == &none || xb.offset > sb.offset:
934 case xb.offset < sb.offset:
937 for i := range sb.bits {
938 if sb.bits[i]&^xb.bits[i] != 0 {
949 // Equals reports whether the sets s and t have the same elements.
950 func (s *Sparse) Equals(t *Sparse) bool {
958 case sb == &none && tb == &none:
960 case sb == &none || tb == &none:
962 case sb.offset != tb.offset:
964 case sb.bits != tb.bits:
973 // String returns a human-readable description of the set s.
974 func (s *Sparse) String() string {
977 s.forEach(func(x int) {
981 fmt.Fprintf(&buf, "%d", x)
987 // BitString returns the set as a string of 1s and 0s denoting the sum
988 // of the i'th powers of 2, for each i in s. A radix point, always
989 // preceded by a digit, appears if the sum is non-integral.
992 // {}.BitString() = "0"
993 // {4,5}.BitString() = "110000"
994 // {-3}.BitString() = "0.001"
995 // {-3,0,4,5}.BitString() = "110001.001"
997 func (s *Sparse) BitString() string {
1002 min, max := s.Min(), s.Max()
1007 nbytes++ // zero bit
1010 nbytes += len(".") - min
1013 b := make([]byte, nbytes)
1020 s.forEach(func(x int) {
1029 // GoString returns a string showing the internal representation of
1032 func (s *Sparse) GoString() string {
1033 var buf bytes.Buffer
1034 for b := s.first(); b != &none; b = s.next(b) {
1035 fmt.Fprintf(&buf, "block %p {offset=%d next=%p prev=%p",
1036 b, b.offset, b.next, b.prev)
1037 for _, w := range b.bits {
1038 fmt.Fprintf(&buf, " 0%016x", w)
1040 fmt.Fprintf(&buf, "}\n")
1045 // AppendTo returns the result of appending the elements of s to slice
1047 func (s *Sparse) AppendTo(slice []int) []int {
1048 s.forEach(func(x int) {
1049 slice = append(slice, x)
1054 // -- Testing/debugging ------------------------------------------------
1056 // check returns an error if the representation invariants of s are violated.
1057 func (s *Sparse) check() error {
1060 // An empty set must have only the root block with offset MaxInt.
1061 if s.root.next != &s.root {
1062 return fmt.Errorf("multiple blocks with empty root block")
1064 if s.root.offset != MaxInt {
1065 return fmt.Errorf("empty set has offset %d, should be MaxInt", s.root.offset)
1069 for b := s.first(); ; b = s.next(b) {
1070 if b.offset%bitsPerBlock != 0 {
1071 return fmt.Errorf("bad offset modulo: %d", b.offset)
1074 return fmt.Errorf("empty block")
1076 if b.prev.next != b {
1077 return fmt.Errorf("bad prev.next link")
1079 if b.next.prev != b {
1080 return fmt.Errorf("bad next.prev link")
1082 if b.next == &s.root {
1085 if b.offset >= b.next.offset {
1086 return fmt.Errorf("bad offset order: b.offset=%d, b.next.offset=%d",
1087 b.offset, b.next.offset)