Skip to content

Commit

Permalink
feat(gateway): Gateway.FastDirIndexThreshold (ipfs#8853)
Browse files Browse the repository at this point in the history
* fix(core/gateway): option to limit directory size listing

* feat(gw): HTMLDirListingLimit

This is alternative take on the way we limit the HTML listing output.
Instead of a hard cut-off, we list up to HTMLDirListingLimit.
When a directory has more items than HTMLDirListingLimit we show
additional header and footer informing user that only $HTMLDirListingLimit
items are listed. This is a better UX.

* fix: 0 disables Gateway.HTMLDirListingLimit

* refactor: Gateway.FastDirIndexThreshold

see explainer in docs/config.md

* refactor: prealoc slices

* docs: Gateway.FastDirIndexThreshold

* refactor: core/corehttp/gateway_handler.go

ipfs#8853 (comment)

* docs: apply suggestions from code review

Co-authored-by: Alan Shaw <[email protected]>

Co-authored-by: Marcin Rataj <[email protected]>
Co-authored-by: Alan Shaw <[email protected]>
  • Loading branch information
3 people authored Apr 28, 2022
1 parent 4f7d4bc commit 25cc85f
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 54 deletions.
2 changes: 1 addition & 1 deletion assets/dir-index-html/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<div class="menu-item-narrow"><a href="https://ipfs.io" target="_blank" rel="noopener noreferrer">About</a></div>
<div class="menu-item-narrow"><a href="https://ipfs.io#install" target="_blank" rel="noopener noreferrer">Install</a></div>
<div>
<a href="https://github.com/ipfs/dir-index-html/issues/" target="_blank" rel="noopener noreferrer">
<a href="https://github.com/ipfs/go-ipfs/issues/new/choose" target="_blank" rel="noopener noreferrer" title="Report a bug">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 18.4 21"><circle cx="7.5" cy="4.8" r="1"/><circle cx="11.1" cy="4.8" r="1"/><path d="M12.7 8.4c-0.5-1.5-1.9-2.5-3.5-2.5 -1.6 0-3 1-3.5 2.5H12.7z"/><path d="M8.5 9.7H5c-0.5 0.8-0.7 1.7-0.7 2.7 0 2.6 1.8 4.8 4.2 5.2V9.7z"/><path d="M13.4 9.7H9.9v7.9c2.4-0.4 4.2-2.5 4.2-5.2C14.1 11.4 13.9 10.5 13.4 9.7z"/><circle cx="15.7" cy="12.9" r="1"/><circle cx="15.1" cy="15.4" r="1"/><circle cx="15.3" cy="10.4" r="1"/><circle cx="2.7" cy="12.9" r="1"/><circle cx="3.3" cy="15.4" r="1"/><circle cx="3.1" cy="10.4" r="1"/></svg>
</a>
</div>
Expand Down
2 changes: 1 addition & 1 deletion assets/dir-index-html/src/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
<div class="menu-item-narrow"><a href="https://ipfs.io" target="_blank" rel="noopener noreferrer">About</a></div>
<div class="menu-item-narrow"><a href="https://ipfs.io#install" target="_blank" rel="noopener noreferrer">Install</a></div>
<div>
<a href="https://github.com/ipfs/dir-index-html/issues/" target="_blank" rel="noopener noreferrer">
<a href="https://github.com/ipfs/go-ipfs/issues/new/choose" target="_blank" rel="noopener noreferrer" title="Report a bug">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 18.4 21"><circle cx="7.5" cy="4.8" r="1"/><circle cx="11.1" cy="4.8" r="1"/><path d="M12.7 8.4c-0.5-1.5-1.9-2.5-3.5-2.5 -1.6 0-3 1-3.5 2.5H12.7z"/><path d="M8.5 9.7H5c-0.5 0.8-0.7 1.7-0.7 2.7 0 2.6 1.8 4.8 4.2 5.2V9.7z"/><path d="M13.4 9.7H9.9v7.9c2.4-0.4 4.2-2.5 4.2-5.2C14.1 11.4 13.9 10.5 13.4 9.7z"/><circle cx="15.7" cy="12.9" r="1"/><circle cx="15.1" cy="15.4" r="1"/><circle cx="15.3" cy="10.4" r="1"/><circle cx="2.7" cy="12.9" r="1"/><circle cx="3.3" cy="15.4" r="1"/><circle cx="3.1" cy="10.4" r="1"/></svg>
</a>
</div>
Expand Down
17 changes: 9 additions & 8 deletions assets/dir-index-html/test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ const templateFile = "../dir-index.html"

// Copied from go-ipfs/core/corehttp/gateway_indexPage.go
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
}

type directoryItem struct {
Expand Down
10 changes: 9 additions & 1 deletion config/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,15 @@ type Gateway struct {
// }
PathPrefixes []string

// FIXME: Not yet implemented
// FastDirIndexThreshold is the maximum number of items in a directory
// before the Gateway switches to a shallow, faster listing which only
// requires the root node. This allows for listing big directories fast,
// without the linear slowdown caused by reading size metadata from child
// nodes.
// Setting to 0 will enable fast listings for all directories.
FastDirIndexThreshold *OptionalInteger `json:",omitempty"`

// FIXME: Not yet implemented: https://github.com/ipfs/go-ipfs/issues/8059
APICommands []string

// NoFetch configures the gateway to _not_ fetch blocks in response to
Expand Down
2 changes: 1 addition & 1 deletion core/coreapi/unixfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ func (api *UnixfsAPI) processLink(ctx context.Context, linkres ft.LinkResult, se
}

func (api *UnixfsAPI) lsFromLinksAsync(ctx context.Context, dir uio.Directory, settings *options.UnixfsLsSettings) (<-chan coreiface.DirEntry, error) {
out := make(chan coreiface.DirEntry)
out := make(chan coreiface.DirEntry, uio.DefaultShardWidth)

go func() {
defer close(out)
Expand Down
14 changes: 8 additions & 6 deletions core/corehttp/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ import (
)

type GatewayConfig struct {
Headers map[string][]string
Writable bool
PathPrefixes []string
Headers map[string][]string
Writable bool
PathPrefixes []string
FastDirIndexThreshold int
}

// A helper function to clean up a set of headers:
Expand Down Expand Up @@ -89,9 +90,10 @@ func GatewayOption(writable bool, paths ...string) ServeOption {
}, headers[ACEHeadersName]...))

var gateway http.Handler = newGatewayHandler(GatewayConfig{
Headers: headers,
Writable: writable,
PathPrefixes: cfg.Gateway.PathPrefixes,
Headers: headers,
Writable: writable,
PathPrefixes: cfg.Gateway.PathPrefixes,
FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)),
}, api)

gateway = otelhttp.NewHandler(gateway, "Gateway.Request")
Expand Down
2 changes: 2 additions & 0 deletions core/corehttp/gateway_handler_unixfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()

// Handling UnixFS
dr, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
Expand All @@ -39,6 +40,7 @@ func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter,
internalWebError(w, fmt.Errorf("unsupported UnixFS type"))
return
}

logger.Debugw("serving unixfs directory", "path", contentPath)
i.serveDirectory(ctx, w, r, resolvedPath, contentPath, dir, begin, logger)
}
66 changes: 39 additions & 27 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/ipfs/go-ipfs/tracing"
path "github.com/ipfs/go-path"
"github.com/ipfs/go-path/resolver"
options "github.com/ipfs/interface-go-ipfs-core/options"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -102,36 +103,46 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
return
}

// Optimization 1:
// List children without fetching their root blocks (fast, but no size info)
results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false))
if err != nil {
internalWebError(w, err)
return
}

// storage for directory listing
var dirListing []directoryItem
dirit := dir.Entries()
for dirit.Next() {
size := "?"
if s, err := dirit.Node().Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}
dirListing := make([]directoryItem, 0, len(results))

resolved, err := i.api.ResolvePath(ctx, ipath.Join(resolvedPath, dirit.Name()))
if err != nil {
for link := range results {
if link.Err != nil {
internalWebError(w, err)
return
}
hash := resolved.Cid().String()

// See comment above where originalUrlPath is declared.
hash := link.Cid.String()
di := directoryItem{
Size: size,
Name: dirit.Name(),
Path: gopath.Join(originalUrlPath, dirit.Name()),
Size: "", // no size because we did not fetch child nodes
Name: link.Name,
Path: gopath.Join(originalUrlPath, link.Name),
Hash: hash,
ShortHash: shortHash(hash),
}
dirListing = append(dirListing, di)
}
if dirit.Err() != nil {
internalWebError(w, dirit.Err())
return

// Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold
if len(dirListing) < i.config.FastDirIndexThreshold {
dirit := dir.Entries()
linkNo := 0
for dirit.Next() {
size := "?"
if s, err := dirit.Node().Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}
dirListing[linkNo].Size = size
linkNo++
}
}

// construct the correct back link
Expand Down Expand Up @@ -180,14 +191,15 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit

// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
FastDirIndexThreshold: i.config.FastDirIndexThreshold,
}

logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash)
Expand Down
17 changes: 9 additions & 8 deletions core/corehttp/gateway_indexPage.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ import (

// structs for directory listing
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
}

type directoryItem struct {
Expand Down
15 changes: 15 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ config file at runtime.
- [`Gateway.NoDNSLink`](#gatewaynodnslink)
- [`Gateway.HTTPHeaders`](#gatewayhttpheaders)
- [`Gateway.RootRedirect`](#gatewayrootredirect)
- [`Gateway.FastDirIndexThreshold`](#gatewayfastdirindexthreshold)
- [`Gateway.Writable`](#gatewaywritable)
- [`Gateway.PathPrefixes`](#gatewaypathprefixes)
- [`Gateway.PublicGateways`](#gatewaypublicgateways)
Expand Down Expand Up @@ -646,6 +647,20 @@ Default: `""`

Type: `string` (url)

### `Gateway.FastDirIndexThreshold`

The maximum number of items in a directory before the Gateway switches
to a shallow, faster listing which only requires the root node.

This allows for fast listings of big directories, without the linear slowdown caused
by reading size metadata from child nodes.

Setting to 0 will enable fast listings for all directories.

Default: `100`

Type: `optionalInteger`

### `Gateway.Writable`

A boolean to configure whether the gateway is writeable or not.
Expand Down
9 changes: 9 additions & 0 deletions test/sharness/lib/test-lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,15 @@ test_should_contain() {
fi
}

test_should_not_contain() {
test "$#" = 2 || error "bug in the test script: not 2 parameters to test_should_not_contain"
if grep -q "$1" "$2"
then
echo "'$2' contains undesired value '$1'"
return 1
fi
}

test_str_contains() {
find=$1
shift
Expand Down
26 changes: 25 additions & 1 deletion test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ test_expect_success "Add the test directory" '
echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt &&
echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt &&
DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) &&
FILE_CID=$(ipfs files stat /ipfs/$DIR_CID/ą/ę/file-źł.txt | head -1)
FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) &&
FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size)
echo "$FILE_CID / $FILE_SIZE"
'

## ============================================================================
Expand Down Expand Up @@ -135,6 +137,28 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.io
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.io/#$FILE_CID\" target=\"_blank\" rel=\"noreferrer noopener\">" list_response
'

## ============================================================================
## Test dir listing of a big directory
## ============================================================================

test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_contain ">${FILE_SIZE} B</td>" list_response
'

# force fast dir index for all responses
ipfs config --json Gateway.FastDirIndexThreshold 0
# restart daemon to apply config changes
test_kill_ipfs_daemon
test_launch_ipfs_daemon

test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_not_contain ">${FILE_SIZE} B</td>" list_response
'

## ============================================================================
## End of tests, cleanup
## ============================================================================
Expand Down

0 comments on commit 25cc85f

Please sign in to comment.