Skip to content

Commit

Permalink
Update handling of files without Size attribute
Browse files Browse the repository at this point in the history
Clarifies difference in how files with and without Size attributes work
by defining them as file-like and non-file-like and describing what that
means. Primarily enables interaction with files where read and write are
not symmetric.

Signed-off-by: Michael Smith <[email protected]>
  • Loading branch information
MikaelSmith committed Dec 20, 2019
1 parent 8a4d858 commit 8fc111e
Show file tree
Hide file tree
Showing 8 changed files with 409 additions and 148 deletions.
4 changes: 4 additions & 0 deletions docs/docs/external-plugins.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ where `Some content` is the entry's content.

When `write` is invoked, the script must read from `stdin` to get the content to write to the entry.

Wash distinguishes between two different patterns for things you can read and write. It considers a "file-like" entry to be one with a defined size (so the `size` attribute is set when listing the entry). Reading and writing a "file-like" entry edits the contents.

Something that can be read and written but doesn't define size has different characteristics. Reading and writing are not symmetrical: if you write to it then read from it, you may not see what you just wrote. So these non-file-like entries error if you try to open them with a ReadWrite handle.

### Examples

```
Expand Down
2 changes: 2 additions & 0 deletions docs/docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ echo "Hello, world!"
### write
The `write` action lets you write data to an entry. Thus, any command that writes a file also works with these entries.

Note that Wash distinguishes between file-like and non-file-like entries. An entry is file-like if it's readable and writable and defines its size; you can edit it like a file. If it doesn't define a size then it's non-file-like, and trying to open it with a ReadWrite handle will error; reads from it may not return data you previously wrote to it.

#### Examples
```
wash . ❯ echo 'exit 1' >> gcp/Wash/storage/some-wash-stuff/an\ example\ folder/static.sh
Expand Down
225 changes: 135 additions & 90 deletions fuse/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,28 @@ import (

// ==== FUSE file Interface ====

// file's implementation differs primarily on two axes: whether `Size` is set, and whether we're
// currently writing.
// - a *file-like* entry declares a `Size` in its `Attributes`
// - reads and writes are symmetric; the kernel page cache will be used, and the file size
// represents the current local state of the file
// - when writing, `data` represents the local state of the file; changes to its size will be
// reflected in `data` and reads will be served from it. The file's size will be `len(data)`.
// Unchanged sections will be filled from `plugin.Read` as needed.
// - when not writing, data is read directly from `plugin.Read`
// - a *non-file-like* entry has `Size` unset
// - read always pulls from `plugin.Read` and writes are buffered independently
// - `data` stores only the data to be written, and is not initialized from `plugin.Read`
// - the file's size will be reported as its readable size; it will not reflect calls to `write`
//
// Note that writes only result in calling `plugin.Write` when a file handle is closed. Writing
// with multiple handles will be protected by `mux`, but all writes will operate on the same `data`
// and the first handle close will trigger `plugin.Write`.
//
// `readSize` will always be initialized from either the `Size` attribute, or if unset then the
// length of data available to read.
//
// `writers` are used to track in-progress writes so we know when to `plugin.Write` on `Flush`
type file struct {
fuseNode

Expand All @@ -23,25 +45,33 @@ type file struct {
writers map[fuse.HandleID]struct{}
// Only valid if len(writers) > 0
data []byte
// Size for content during writing or with unspecified size attribute
size uint64
// Tracks whether we still need to query plugin.Size for the entry's size before using size
sizeValid bool
// Size of readable content, necessary for *non-file-like* entries
readSize uint64
}

func newFile(p *dir, e plugin.Entry) *file {
return &file{fuseNode: newFuseNode("f", p, e), writers: make(map[fuse.HandleID]struct{})}
}

func (f *file) isFileLike() bool {
attr := plugin.Attributes(f.entry)
return attr.HasSize()
}

// If currently writing a file-like object, we should use local state to fulfil many requests.
func (f *file) useLocalState() bool {
return len(f.writers) != 0 && f.isFileLike()
}

var _ = fs.Node(&file{})
var _ = fs.Handle(&file{})

func (f *file) Attr(ctx context.Context, a *fuse.Attr) error {
f.mux.Lock()
defer f.mux.Unlock()

if len(f.writers) == 0 {
// Fetch updated attributes.
if !f.useLocalState() {
// Fetch updated attributes only if we're not currently writing to it.
entry, err := f.refind(ctx)
if err != nil {
activity.Warnf(ctx, "FUSE: Attr errored %v, %v", f, err)
Expand All @@ -59,10 +89,10 @@ func (f *file) fillAttr(a *fuse.Attr) {
attr := plugin.Attributes(f.entry)
applyAttr(a, attr, getFileMode(f.entry))

if len(f.writers) != 0 || !attr.HasSize() {
if f.useLocalState() || !attr.HasSize() {
// Use whatever size we know locally. Retrieving content can be expensive so we settle for
// including size only when it's been retreived previously by opening the file.
a.Size = f.size
a.Size = f.readSize
}
}

Expand All @@ -86,20 +116,21 @@ var _ = fs.NodeOpener(&file{})
// - An entry that only supports Write can only be opened WriteOnly.
//
// When writing and flushing a file, we may call Read on the entry (if it supports Read) even if
// opened WriteOnly. That only happens when performing a partial write, or when its Size attribute
// isn't set and there are no calls to Setattr to define the file size.
// opened WriteOnly. That only happens when performing a partial write of a *file-like* entry.
func (f *file) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenResponse) (fs.Handle, error) {
f.mux.Lock()
defer f.mux.Unlock()
activity.Record(ctx, "FUSE: Open %v: %+v", f, *req)

// Check for an updated entry in case it has static state.
entry, err := f.refind(ctx)
if err != nil {
activity.Warnf(ctx, "FUSE: Open errored %v, %v", f, err)
return nil, err
if !f.useLocalState() {
// Check for an updated entry in case it has static state.
entry, err := f.refind(ctx)
if err != nil {
activity.Warnf(ctx, "FUSE: Open errored %v, %v", f, err)
return nil, err
}
f.entry = entry
}
f.entry = entry

readable := plugin.ReadAction().IsSupportedOn(f.entry)
writable := plugin.WriteAction().IsSupportedOn(f.entry)
Expand All @@ -115,31 +146,44 @@ func (f *file) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenR
return nil, fuse.ENOTSUP
}

if req.Flags.IsReadOnly() || req.Flags.IsReadWrite() {
if attr := plugin.Attributes(entry); !attr.HasSize() {
// The entry's content size is unknown so open the file in direct IO mode. This enables FUSE
// to still read the entry's content so that built-in tools like cat and grep still work.
resp.Flags |= fuse.OpenDirectIO
if !f.isFileLike() && req.Flags.IsReadWrite() {
// Error ReadWrite on non-file-like entries because it probably won't work well.
activity.Warnf(ctx, "FUSE: Open Read/Write is not supported on non-file-like entry %v", f)
return nil, fuse.ENOTSUP
}

// Also set the size for editors that won't read anything on an apparently empty file. This
// doesn't help with cat/grep because they check attributes before opening the file.
f.size, err = plugin.Size(ctx, entry)
if err != nil {
return nil, err
}
} else {
f.size = attr.Size()
}
if !f.isFileLike() {
// Open the file in direct IO mode to avoid the kernel page cache. This also enables FUSE to
// still read the entry's content so that built-in tools like cat and grep still work.
resp.Flags |= fuse.OpenDirectIO
}

f.sizeValid = true
} else if !readable {
// Reported size is irrelevant for write-only entries.
f.sizeValid = true
if f.isFileLike() || req.Flags.IsReadOnly() {
// Get the entry's readable size if we expect to do any reads or keep a local representation.
size, err := plugin.Size(ctx, f.entry)
if err != nil {
return nil, err
}
f.readSize = size
}

return f, nil
}

func (f *file) releaseWriter(handle fuse.HandleID) {
if _, ok := f.writers[handle]; ok {
delete(f.writers, handle)

if len(f.writers) == 0 {
// If we just released the last writer, release the data buffer to conserve memory and
// invalidate cache on the entry and its parent so we get updated content and size on the
// next request. Leave size for entries that don't set it.
f.data = nil
plugin.ClearCacheFor(plugin.ID(f.entry), true)
}
}
}

var _ = fs.HandleReleaser(&file{})

func (f *file) Release(ctx context.Context, req *fuse.ReleaseRequest) error {
Expand All @@ -155,17 +199,9 @@ func (f *file) Release(ctx context.Context, req *fuse.ReleaseRequest) error {
}
}

if _, ok := f.writers[req.Handle]; ok {
delete(f.writers, req.Handle)

if len(f.writers) == 0 {
// If we just released the last writer, release the data buffer to conserve memory and
// invalidate cache on the entry and its parent so we get updated content and size on the
// next request. Leave size so we can use it for future attribute requests.
f.data = nil
plugin.ClearCacheFor(plugin.ID(f.entry), true)
}
}
// Release writer and cleanup if all writers are released. Note that this is usually a noop for
// non-file-like entries, they will have released the writers immediately after `plugin.Write`.
f.releaseWriter(req.Handle)

activity.Record(ctx, "FUSE: Release %v: %+v", f, *req)
return nil
Expand All @@ -177,16 +213,16 @@ func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadR
f.mux.Lock()
defer f.mux.Unlock()

if len(f.writers) == 0 {
if f.useLocalState() {
fuseutil.HandleRead(req, resp, f.data)
} else {
data, err := plugin.ReadWithAnalytics(ctx, f.entry, int64(req.Size), req.Offset)
if err != nil && err != io.EOF {
// If we don't ignore EOF, then cat will display an input/output error message
// for entries with unknown content size.
return err
}
resp.Data = data
} else {
fuseutil.HandleRead(req, resp, f.data)
}

activity.Record(ctx, "FUSE: Read %v/%v bytes starting at %v from %v", len(resp.Data), req.Size, req.Offset, f)
Expand All @@ -202,23 +238,26 @@ func (f *file) Write(ctx context.Context, req *fuse.WriteRequest, resp *fuse.Wri
// Ensure handle is in list of writers.
f.writers[req.Handle] = struct{}{}

// If starting write beyond the current length, read to fill it in.
if start := int64(len(f.data)); req.Offset > start {
data, err := f.load(ctx, start, req.Offset)
if err != nil {
return err
if f.isFileLike() {
// If starting write beyond the current length, read to fill it in.
if start := int64(len(f.data)); req.Offset > start {
data, err := f.load(ctx, start, req.Offset)
if err != nil {
return err
}
f.data = append(f.data, data...)
}
f.data = append(f.data, data...)
}

// Expand the buffer if necessary and update known size.
// Expand the buffer if necessary to store the write data.
newLen := req.Offset + int64(len(req.Data))
if newLen := int(newLen); newLen > len(f.data) {
f.data = append(f.data, make([]byte, newLen-len(f.data))...)
}
// Size is irrelevant for write-only entries.
if plugin.ReadAction().IsSupportedOn(f.entry) && f.size < uint64(newLen) {
f.size = uint64(newLen)

// If file-like, then update readable size to reflect the expanded buffer.
if f.isFileLike() && f.readSize < uint64(newLen) {
f.readSize = uint64(newLen)
}

resp.Size = copy(f.data[req.Offset:], req.Data)
Expand All @@ -244,44 +283,43 @@ func (f *file) Flush(ctx context.Context, req *fuse.FlushRequest) error {
defer f.mux.Unlock()
activity.Record(ctx, "FUSE: Flush %v: %+v", f, *req)

if _, ok := f.writers[req.Handle]; !ok {
return nil
}

// If this handle had an open writer, write current data.
if _, ok := f.writers[req.Handle]; ok {
dataLen := int64(len(f.data))
// Write-only entries don't track size.
if plugin.ReadAction().IsSupportedOn(f.entry) && uint64(dataLen) > f.size {
dataLen := int64(len(f.data))
if f.isFileLike() {
// Only file-like entries keep data and readSize in sync.
if uint64(dataLen) > f.readSize {
panic("Size was not kept up-to-date with changes to data.")
}

if !f.sizeValid {
// If this is a readable entry that's opened WriteOnly and hasn't been truncated, we haven't
// determined its size. Get it now so we can buffer as needed.
size, err := plugin.Size(ctx, f.entry)
if err != nil {
return err
}
f.size = size
f.sizeValid = true
}

if uint64(dataLen) < f.size {
if uint64(dataLen) < f.readSize {
// Missing some data, load the remainder before writing.
data, err := f.load(ctx, dataLen, int64(f.size))
data, err := f.load(ctx, dataLen, int64(f.readSize))
if err != nil && err != io.EOF {
return err
}
f.data = append(f.data, data...)

// If still too small then something increased the size beyond the original.
// Fill with null characters.
if sz := uint64(len(f.data)); sz < f.size {
f.data = append(f.data, make([]byte, f.size-sz)...)
if sz := uint64(len(f.data)); sz < f.readSize {
f.data = append(f.data, make([]byte, f.readSize-sz)...)
}
}
}

if err := plugin.WriteWithAnalytics(ctx, f.entry.(plugin.Writable), f.data); err != nil {
activity.Warnf(ctx, "FUSE: Error writing %v: %v", f, err)
return err
}
if err := plugin.WriteWithAnalytics(ctx, f.entry.(plugin.Writable), f.data); err != nil {
activity.Warnf(ctx, "FUSE: Error writing %v: %v", f, err)
return err
}

// Non-file-like entries start from scratch on each Write operation, and have their cache
// invalidated whenever we write to them because we can't accurately model their readable data.
if !f.isFileLike() {
f.releaseWriter(req.Handle)
}
return nil
}
Expand All @@ -294,20 +332,27 @@ func (f *file) Setattr(ctx context.Context, req *fuse.SetattrRequest, resp *fuse
activity.Record(ctx, "FUSE: Setattr[%v] %v: %+v", req.Handle, f, *req)

if req.Valid.Size() {
if req.Valid.Handle() {
// Ensure handle is in list of writers.
f.writers[req.Handle] = struct{}{}
} else {
if !req.Valid.Handle() {
// No guarantee we'll ever write the change. If this is ever necessary, we could update it
// to immediately do a plugin.Write.
return fuse.ENOTSUP
}

// Update known size. If the caller tries to increase the size of a Write-only entry, Flush
// will error because we won't be able to read to fill it in. We choose to error instead of
// filling with null data because there's no obvious use-case for supporting it.
f.size = req.Size
f.sizeValid = true
// Ensure handle is in list of writers.
f.writers[req.Handle] = struct{}{}

if f.isFileLike() {
// Update known size.
f.readSize = req.Size
} else {
// Non-file-like entries use `data` as a write buffer. There's nothing to fill in from, so
// just resize as necessary.
if curLen := uint64(len(f.data)); req.Size > curLen {
f.data = append(f.data, make([]byte, req.Size-uint64(len(f.data)))...)
} else if req.Size < curLen {
f.data = f.data[:req.Size]
}
}
}

f.fillAttr(&resp.Attr)
Expand Down
Loading

0 comments on commit 8fc111e

Please sign in to comment.