Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix: add lock in boltdb #1286

Merged
merged 1 commit into from
May 10, 2018
Merged

Conversation

rudyfly
Copy link
Collaborator

@rudyfly rudyfly commented May 9, 2018

Ⅰ. Describe what this PR did

Add lock in boltdb, in order to avoid pouchd panic when put/get boltdb
objects concurrently.

Ⅱ. Does this pull request fix one issue?

use shell script will cause pouchd panic

#!/bin/bash

op=$1

for i in `seq 1 10`
do
	case $op in
		create)
			pouch run -d --name multi-create-$i -v /mnt/dir1 -v /mnt/dir2 -v /mnt/dir3 -v /mnt/dir4 -v /mnt/dir5 -v /mnt/dir6 -v /mnt/dir7 -v /mnt/dir8 -v /mnt/dir9 -v /mnt/dir10 registry.hub.docker.com/library/busybox:latest top &
			;;
		remove)
			pouch rm -vf multi-create-$i
			;;
		*)
			;;
	esac
done
# test.sh create

pouchd panic

unexpected fault address 0x7fc8fd450853
fatal error: fault
[signal SIGSEGV: segmentation violation code=0x1 addr=0x7fc8fd450853 pc=0xbf3adc]

goroutine 570 [running]:
runtime.throw(0x1b6bfeb, 0x5)
	/usr/local/go/src/runtime/panic.go:605 +0x95 fp=0xc4210d8548 sp=0xc4210d8528 pc=0xbc1275
runtime.sigpanic()
	/usr/local/go/src/runtime/signal_unix.go:374 +0x227 fp=0xc4210d8598 sp=0xc4210d8548 pc=0xbd8dc7
runtime.memmove(0xc420f23980, 0x7fc8fd450853, 0x40)
	/usr/local/go/src/runtime/memmove_amd64.s:184 +0x15c fp=0xc4210d85a0 sp=0xc4210d8598 pc=0xbf3adc
runtime.slicebytetostring(0x0, 0x7fc8fd450853, 0x40, 0x367, 0xc420f23980, 0x40)
	/usr/local/go/src/runtime/string.go:98 +0x79 fp=0xc4210d85d0 sp=0xc4210d85a0 pc=0xbdd829
encoding/json.(*decodeState).literalStore(0xc420f52ea0, 0x7fc8fd450852, 0x42, 0x368, 0x19270a0, 0xc420e8a240, 0x198, 0x185e900)
	/usr/local/go/src/encoding/json/decode.go:939 +0x1d16 fp=0xc4210d8940 sp=0xc4210d85d0 pc=0xc9ee36
encoding/json.(*decodeState).literal(0xc420f52ea0, 0x19270a0, 0xc420e8a240, 0x198)
	/usr/local/go/src/encoding/json/decode.go:802 +0xdb fp=0xc4210d8998 sp=0xc4210d8940 pc=0xc9ce4b
encoding/json.(*decodeState).value(0xc420f52ea0, 0x19270a0, 0xc420e8a240, 0x198)
	/usr/local/go/src/encoding/json/decode.go:408 +0x31e fp=0xc4210d8a18 sp=0xc4210d8998 pc=0xc99e8e
encoding/json.(*decodeState).object(0xc420f52ea0, 0x1b32c00, 0xc420e8a240, 0x16)
	/usr/local/go/src/encoding/json/decode.go:736 +0x1284 fp=0xc4210d8c68 sp=0xc4210d8a18 pc=0xc9c404
encoding/json.(*decodeState).value(0xc420f52ea0, 0x1b32c00, 0xc420e8a240, 0x16)
	/usr/local/go/src/encoding/json/decode.go:405 +0x2e4 fp=0xc4210d8ce8 sp=0xc4210d8c68 pc=0xc99e54
encoding/json.(*decodeState).unmarshal(0xc420f52ea0, 0x1b32c00, 0xc420e8a240, 0x0, 0x0)
	/usr/local/go/src/encoding/json/decode.go:187 +0x20e fp=0xc4210d8d60 sp=0xc4210d8ce8 pc=0xc992de
encoding/json.Unmarshal(0x7fc8fd45084a, 0x370, 0x370, 0x1b32c00, 0xc420e8a240, 0x7fc8fd45084a, 0x370)
	/usr/local/go/src/encoding/json/decode.go:107 +0x148 fp=0xc4210d8da8 sp=0xc4210d8d60 pc=0xc98ca8
github.com/alibaba/pouch/pkg/meta.(*Store).Get(0xc420419f20, 0xc420e49bc0, 0x40, 0xc420e3be00, 0x1, 0x0, 0xc4210d8eb0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/pkg/meta/store.go:166 +0x10b fp=0xc4210d8e40 sp=0xc4210d8da8 pc=0x12cbb7b
github.com/alibaba/pouch/storage/volume.(*Core).GetVolume(0xc4201184d0, 0xc420e49bc0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1a7daa0, 0x1927001, ...)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/storage/volume/core.go:80 +0x9d fp=0xc4210d8f38 sp=0xc4210d8e40 pc=0x12f569d
github.com/alibaba/pouch/daemon/mgr.(*VolumeManager).Get(0xc4203100a0, 0x2506340, 0xc42044ff00, 0xc420e49bc0, 0x40, 0x0, 0xc420e50700, 0x7fc8fd5a2458)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/volume.go:137 +0xb5 fp=0xc4210d8ff8 sp=0xc4210d8f38 pc=0x17aabf5
github.com/alibaba/pouch/daemon/mgr.(*VolumeManager).Attach(0xc4203100a0, 0x2506340, 0xc42044ff00, 0xc420e49bc0, 0x40, 0xc420e1c510, 0x0, 0xc420964f90, 0x0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/volume.go:161 +0xeb fp=0xc4210d9248 sp=0xc4210d8ff8 pc=0x17aaf6b
github.com/alibaba/pouch/daemon/mgr.(*ContainerManager).attachVolume(0xc42025eab0, 0x2506340, 0xc42044ff00, 0xc420e49bc0, 0x40, 0xc420ad9380, 0x0, 0x53, 0x1b6c220, 0x5, ...)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/container.go:1798 +0x489 fp=0xc4210d93a0 sp=0xc4210d9248 pc=0x1773989
github.com/alibaba/pouch/daemon/mgr.(*ContainerManager).getMountPointFromBinds(0xc42025eab0, 0x2506340, 0xc42044ff00, 0xc420ad9380, 0xc420907ef0, 0x0, 0x0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/container.go:1915 +0xb54 fp=0xc4210d95d0 sp=0xc4210d93a0 pc=0x1774e04
github.com/alibaba/pouch/daemon/mgr.(*ContainerManager).generateMountPoints(0xc42025eab0, 0x2506340, 0xc42044ff00, 0xc420ad9380, 0x0, 0x0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/container.go:1844 +0x1ed fp=0xc4210d9638 sp=0xc4210d95d0 pc=0x1773fcd
github.com/alibaba/pouch/daemon/mgr.(*ContainerManager).Create(0xc42025eab0, 0x2506340, 0xc42044ff00, 0xc42042d0e3, 0xe, 0xc420ad8b60, 0x0, 0x0, 0x0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/daemon/mgr/container.go:454 +0xd6f fp=0xc4210d9928 sp=0xc4210d9638 pc=0x1766dbf
github.com/alibaba/pouch/apis/server.(*Server).createContainer(0xc4204504a0, 0x2506340, 0xc42044ff00, 0x2505140, 0xc4203ea000, 0xc4202f6600, 0x3, 0x3)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/apis/server/container_bridge.go:172 +0x2e9 fp=0xc4210d9a20 sp=0xc4210d9928 pc=0x17f5be9
github.com/alibaba/pouch/apis/server.(*Server).(github.com/alibaba/pouch/apis/server.createContainer)-fm(0x2506340, 0xc42044ff00, 0x2505140, 0xc4203ea000, 0xc4202f6600, 0xc4202f6600, 0xc4210e1ad0)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/apis/server/router.go:36 +0x5c fp=0xc4210d9a70 sp=0xc4210d9a20 pc=0x18021ac
github.com/alibaba/pouch/apis/server.filter.func1(0x2505140, 0xc4203ea000, 0xc4202f6600)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/apis/server/router.go:149 +0x327 fp=0xc4210d9bf0 sp=0xc4210d9a70 pc=0x18017f7
net/http.HandlerFunc.ServeHTTP(0xc420998f90, 0x2505140, 0xc4203ea000, 0xc4202f6600)
	/usr/local/go/src/net/http/server.go:1918 +0x44 fp=0xc4210d9c18 sp=0xc4210d9bf0 pc=0xe37c84
github.com/alibaba/pouch/vendor/github.com/gorilla/mux.(*Router).ServeHTTP(0xc42096f4a0, 0x2505140, 0xc4203ea000, 0xc4202f6600)
	/tmp/pouchbuild/src/github.com/alibaba/pouch/vendor/github.com/gorilla/mux/mux.go:133 +0xed fp=0xc4210d9d18 sp=0xc4210d9c18 pc=0x173a44d
net/http.serverHandler.ServeHTTP(0xc420469110, 0x2505140, 0xc4203ea000, 0xc4202f6400)
	/usr/local/go/src/net/http/server.go:2619 +0xb4 fp=0xc4210d9d48 sp=0xc4210d9d18 pc=0xe3a964
net/http.(*conn).serve(0xc420229ea0, 0x2506340, 0xc42044fd40)
	/usr/local/go/src/net/http/server.go:1801 +0x71d fp=0xc4210d9fc8 sp=0xc4210d9d48 pc=0xe36b5d
runtime.goexit()
	/usr/local/go/src/runtime/asm_amd64.s:2337 +0x1 fp=0xc4210d9fd0 sp=0xc4210d9fc8 pc=0xbf31d1
created by net/http.(*Server).Serve
	/usr/local/go/src/net/http/server.go:2720 +0x288

Ⅲ. Describe how you did it

Add lock when put/get boltdb

Ⅳ. Describe how to verify it

use shell script to check pouchd panic or not.

Ⅴ. Special notes for reviews

Signed-off-by: Rudy Zhang [email protected]

Add lock in boltdb, in order to avoid pouchd panic when put/get boltdb
objects concurrently.

Signed-off-by: Rudy Zhang <[email protected]>
@pouchrobot pouchrobot added kind/bug This is bug report for project size/S labels May 9, 2018
@codecov-io
Copy link

Codecov Report

Merging #1286 into master will increase coverage by 0.31%.
The diff coverage is 100%.

Impacted file tree graph

@@            Coverage Diff             @@
##           master    #1286      +/-   ##
==========================================
+ Coverage   15.31%   15.63%   +0.31%     
==========================================
  Files         174      173       -1     
  Lines       11014    10870     -144     
==========================================
+ Hits         1687     1699      +12     
+ Misses       9208     9052     -156     
  Partials      119      119
Impacted Files Coverage Δ
pkg/meta/boltdb.go 66.26% <100%> (+5.7%) ⬆️
daemon/config/config.go 7.89% <0%> (ø) ⬆️
ctrd/client.go 0% <0%> (ø) ⬆️
daemon/mgr/system.go 0% <0%> (ø) ⬆️
ctrd/client_opts.go
pkg/utils/utils.go 77.03% <0%> (+7.23%) ⬆️

@@ -15,6 +16,7 @@ func init() {

type bolt struct {
db *boltdb.DB
sync.Mutex
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if we should add this sync.Mutex, since I found that in the definition of bolt.DB, there are lock as well. Does it conflict? Please check for this.

type DB struct {
	// When enabled, the database will perform a Check() after every commit.
	// A panic is issued if the database is in an inconsistent state. This
	// flag has a large performance impact so it should only be used for
	// debugging purposes.
	StrictMode bool

	// Setting the NoSync flag will cause the database to skip fsync()
	// calls after each commit. This can be useful when bulk loading data
	// into a database and you can restart the bulk load in the event of
	// a system failure or database corruption. Do not set this flag for
	// normal use.
	//
	// If the package global IgnoreNoSync constant is true, this value is
	// ignored.  See the comment on that constant for more details.
	//
	// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
	NoSync bool

	// When true, skips the truncate call when growing the database.
	// Setting this to true is only safe on non-ext3/ext4 systems.
	// Skipping truncation avoids preallocation of hard drive space and
	// bypasses a truncate() and fsync() syscall on remapping.
	//
	// https://github.com/boltdb/bolt/issues/284
	NoGrowSync bool

	// If you want to read the entire database fast, you can set MmapFlag to
	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
	MmapFlags int

	// MaxBatchSize is the maximum size of a batch. Default value is
	// copied from DefaultMaxBatchSize in Open.
	//
	// If <=0, disables batching.
	//
	// Do not change concurrently with calls to Batch.
	MaxBatchSize int

	// MaxBatchDelay is the maximum delay before a batch starts.
	// Default value is copied from DefaultMaxBatchDelay in Open.
	//
	// If <=0, effectively disables batching.
	//
	// Do not change concurrently with calls to Batch.
	MaxBatchDelay time.Duration

	// AllocSize is the amount of space allocated when the database
	// needs to create new pages. This is done to amortize the cost
	// of truncate() and fsync() when growing the data file.
	AllocSize int

	path     string
	file     *os.File
	lockfile *os.File // windows only
	dataref  []byte   // mmap'ed readonly, write throws SEGV
	data     *[maxMapSize]byte
	datasz   int
	filesz   int // current on disk file size
	meta0    *meta
	meta1    *meta
	pageSize int
	opened   bool
	rwtx     *Tx
	txs      []*Tx
	freelist *freelist
	stats    Stats

	pagePool sync.Pool

	batchMu sync.Mutex
	batch   *batch

	rwlock   sync.Mutex   // Allows only one writer at a time.
	metalock sync.Mutex   // Protects meta page access.
	mmaplock sync.RWMutex // Protects mmap access during remapping.
	statlock sync.RWMutex // Protects stats access.

	ops struct {
		writeAt func(b []byte, off int64) (n int, err error)
	}

	// Read only mode.
	// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
	readOnly bool
}

@allencloud
Copy link
Collaborator

LGTM

@pouchrobot pouchrobot added the LGTM one maintainer or community participant agrees to merge the pull reuqest. label May 10, 2018
@allencloud allencloud merged commit 49302b7 into AliyunContainerService:master May 10, 2018
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
kind/bug This is bug report for project LGTM one maintainer or community participant agrees to merge the pull reuqest. size/S
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants