Skip to content

Commit

Permalink
use segregated hashmap to boost the freelist allocate and release per…
Browse files Browse the repository at this point in the history
…formance
  • Loading branch information
WIZARD-CXY committed Jan 22, 2019
1 parent f0ad07c commit ed79fac
Show file tree
Hide file tree
Showing 6 changed files with 456 additions and 59 deletions.
14 changes: 11 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
default: build

race:
@go test -v -race -test.run="TestSimulate_(100op|1000op)"
@TEST_ARRAY_FREELIST=n go test -v -race -test.run="TestSimulate_(100op|1000op)"
@echo "array freelist test"
@TEST_ARRAY_FREELIST=y go test -v -race -test.run="TestSimulate_(100op|1000op)"

fmt:
!(gofmt -l -s -d $(shell find . -name \*.go) | grep '[a-z]')
Expand All @@ -23,8 +25,14 @@ errcheck:
@errcheck -ignorepkg=bytes -ignore=os:Remove go.etcd.io/bbolt

test:
go test -timeout 20m -v -coverprofile cover.out -covermode atomic
TEST_ARRAY_FREELIST=n go test -timeout 20m -v -coverprofile cover.out -covermode atomic
# Note: gets "program not an importable package" in out of path builds
go test -v ./cmd/bbolt
TEST_ARRAY_FREELIST=n go test -v ./cmd/bbolt

@echo "array freelist test"

@TEST_ARRAY_FREELIST=y go test -timeout 20m -v -coverprofile cover.out -covermode atomic
# Note: gets "program not an importable package" in out of path builds
@TEST_ARRAY_FREELIST=y go test -v ./cmd/bbolt

.PHONY: race fmt errcheck test gosimple unused
2 changes: 1 addition & 1 deletion allocate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
)

func TestTx_allocatePageStats(t *testing.T) {
f := newFreelist()
f := newTypedFreelist()
ids := []pgid{2, 3}
f.readIDs(ids)

Expand Down
31 changes: 27 additions & 4 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ var defaultPageSize = os.Getpagesize()
// The time elapsed between consecutive file locking attempts.
const flockRetryTimeout = 50 * time.Millisecond

type freelistType string

const (
// ArrayType indicates backend freelist type is array
ArrayType = freelistType("array")
// HashMapType indicates backend freelist type is hashmap
HashMapType = freelistType("hashmap")
)

// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
Expand Down Expand Up @@ -70,6 +79,12 @@ type DB struct {
// re-sync during recovery.
NoFreelistSync bool

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and framentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
FreelistType freelistType

// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
Expand Down Expand Up @@ -169,6 +184,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags
db.NoFreelistSync = options.NoFreelistSync
db.FreelistType = options.FreelistType

// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
Expand Down Expand Up @@ -283,15 +299,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// concurrent accesses being made to the freelist.
func (db *DB) loadFreelist() {
db.freelistLoad.Do(func() {
db.freelist = newFreelist()
db.freelist = newFreelist(db.FreelistType)
if !db.hasSyncedFreelist() {
// Reconstruct free list by scanning the DB.
db.freelist.readIDs(db.freepages())
} else {
// Read free list from freelist page.
db.freelist.read(db.page(db.meta().freelist))
}
db.stats.FreePageN = len(db.freelist.getFreePageIDs())
db.stats.FreePageN = db.freelist.free_count()
})
}

Expand Down Expand Up @@ -1005,6 +1021,12 @@ type Options struct {
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool

// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and framentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
FreelistType freelistType

// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
Expand Down Expand Up @@ -1034,8 +1056,9 @@ type Options struct {
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
Timeout: 0,
NoGrowSync: false,
FreelistType: ArrayType,
}

// Stats represents statistics about the database.
Expand Down
126 changes: 87 additions & 39 deletions freelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ package bbolt

import (
"fmt"
"os"
"sort"
"unsafe"
)

const testArrayFreelist = "TEST_ARRAY_FREELIST"

// txPending holds a list of pgids and corresponding allocation txns
// that are pending to be freed.
type txPending struct {
Expand All @@ -14,22 +17,62 @@ type txPending struct {
lastReleaseBegin txid // beginning txid of last matching releaseRange
}

// pidSet holds the set of starting pgids which have the same span size
type pidSet map[pgid]struct{}

// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
freelistType freelistType // freelist type
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
forwardMap map[pgid]uint64 // key is start pgid, value is its span size
backwardMap map[pgid]uint64 // key is end pgid, value is its span size
allocate func(txid txid, n int) pgid // the freelist allocate func
free_count func() int // the function which gives you free page number
mergeSpans func(ids pgids) // the mergeSpan func
getFreePageIDs func() []pgid // get free pgids func
readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
}

// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool),
func newFreelist(freelistType freelistType) *freelist {
if env := os.Getenv(testArrayFreelist); env != "" {
if env == "y" {
freelistType = ArrayType
} else {
freelistType = HashMapType
}
}

f := &freelist{
freelistType: freelistType,
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool),
freemaps: make(map[uint64]pidSet),
forwardMap: make(map[pgid]uint64),
backwardMap: make(map[pgid]uint64),
}

if freelistType == HashMapType {
f.allocate = f.hashmapAllocate
f.free_count = f.hashmapFree_count
f.mergeSpans = f.hashmapMergeSpans
f.getFreePageIDs = f.hashmapGetFreePageIDs
f.readIDs = f.hashmapReadIDs
} else {
f.allocate = f.arrayAllocate
f.free_count = f.arrayFree_count
f.mergeSpans = f.arrayMergeSpans
f.getFreePageIDs = f.arrayGetFreePageIDs
f.readIDs = f.arrayReadIDs
}

return f
}

// size returns the size of the page after serialization.
Expand All @@ -47,8 +90,8 @@ func (f *freelist) count() int {
return f.free_count() + f.pending_count()
}

// free_count returns count of free pages
func (f *freelist) free_count() int {
// arrayFree_count returns count of free pages(array version)
func (f *freelist) arrayFree_count() int {
return len(f.ids)
}

Expand All @@ -72,9 +115,9 @@ func (f *freelist) copyall(dst []pgid) {
mergepgids(dst, f.getFreePageIDs(), m)
}

// allocate returns the starting page id of a contiguous list of pages of a given size.
// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(txid txid, n int) pgid {
func (f *freelist) arrayAllocate(txid txid, n int) pgid {
if len(f.ids) == 0 {
return 0
}
Expand Down Expand Up @@ -160,8 +203,7 @@ func (f *freelist) release(txid txid) {
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}

// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
Expand Down Expand Up @@ -194,8 +236,7 @@ func (f *freelist) releaseRange(begin, end txid) {
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}

// rollback removes the pages from a given pending tx.
Expand All @@ -222,8 +263,7 @@ func (f *freelist) rollback(txid txid) {
}
// Remove pages from pending list and mark as free if allocated by txid.
delete(f.pending, txid)
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}

// freed returns whether a given page is in the free list.
Expand All @@ -249,24 +289,39 @@ func (f *freelist) read(p *page) {
f.ids = nil
} else {
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
f.readIDs(ids)
}
}

// Make sure they're sorted.
sort.Sort(pgids(f.ids))
// arrayReadIDs initializes the freelist from a given list of ids.
func (f *freelist) arrayReadIDs(ids []pgid) {
if ids == nil {
return
}
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
// Make sure they're sorted.
sort.Sort(pgids(f.ids))

// Rebuild the page cache.
f.reindex()
}

// readIDs initializes the freelist from a given list of ids.
func (f *freelist) readIDs(ids []pgid) {
f.ids = ids
f.reindex()
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
ids := f.getFreePageIDs()
f.cache = make(map[pgid]bool, len(ids))
for _, id := range ids {
f.cache[id] = true
}
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
f.cache[pendingID] = true
}
}
}

func (f *freelist) getFreePageIDs() []pgid {
func (f *freelist) arrayGetFreePageIDs() []pgid {
return f.ids
}

Expand Down Expand Up @@ -320,15 +375,8 @@ func (f *freelist) reload(p *page) {
f.readIDs(a)
}

// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool, len(f.getFreePageIDs()))
for _, id := range f.getFreePageIDs() {
f.cache[id] = true
}
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
f.cache[pendingID] = true
}
}
// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
func (f *freelist) arrayMergeSpans(ids pgids) {
sort.Sort(ids)
f.ids = pgids(f.ids).merge(ids)
}
Loading

0 comments on commit ed79fac

Please sign in to comment.