Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move ARC data buffers out of vmalloc #2129

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
a395d00
Add compatibility layer for {kmap,kunmap}_atomic
tuxoko Feb 11, 2015
f0178a9
Introduce ABD: linear/scatter dual typed buffer for ARC
tuxoko Feb 11, 2015
a6dc3cf
Modify/Add incremental checksum function for abd_iterate_rfunc
tuxoko Feb 11, 2015
fb74875
Use abd_t in arc.h, ddt.h, dmu.h and zio.h
tuxoko Feb 11, 2015
9803e31
Convert zio_checksum to ABD version
tuxoko Feb 11, 2015
d3fc93e
Handle abd_t in arc.c, bpobj.c and bptree.c
tuxoko Feb 11, 2015
fa0beb6
Handle abd_t in dbuf.c, ddt.c, dmu*.c
tuxoko Feb 11, 2015
b84b661
Handle abd_t in dnode*.c, dsl_*.c
tuxoko Feb 11, 2015
a1d4875
Handle abd_t in sa_impl.h, sa.c, space_map.c, spa.c, spa_history.c an…
tuxoko Feb 11, 2015
ae7e6a1
Handle abd_t in zap*.c, zfs_fuid.c, zfs_sa.c and zfs_vnops.c
tuxoko Feb 11, 2015
c075cd4
Handle abd_t in zio.c
tuxoko Feb 11, 2015
384635d
Handle abd_t in vdev*.c sans vdev_raidz.c
tuxoko Feb 11, 2015
72c82d9
Handle abd_t in vdev_raidz.c
tuxoko Feb 11, 2015
15d4559
Handle ABD in ztest and zdb
tuxoko Feb 11, 2015
b5e9911
Enable ABD
tuxoko Feb 11, 2015
81cd22f
Disable DEBUG_ABD to prevent build error
tuxoko Apr 16, 2015
878c59f
Fix some ABD related coding errors, style and comment
tuxoko Apr 28, 2015
baa2c34
Add non-highmem scatter ABD
tuxoko Apr 28, 2015
e3a3d24
Allow ARC to handle scatter metadata
tuxoko Apr 28, 2015
ae76721
Add ot_scatter to dmu_ot to determine the abd type
tuxoko Apr 29, 2015
eaf3fa2
Enable scatter ABD for indirect blocks
tuxoko Apr 29, 2015
f5492d3
Enable scatter ABD for DMU_OT_DNODE
tuxoko Apr 29, 2015
4187cf2
Merge branch 'master' into ABD
tuxoko Apr 30, 2015
4151465
Allow abd_alloc_scatter to fallback to linear for small allocation
tuxoko Apr 30, 2015
9b5d0c8
Fix build error "'PAGE_SHIFT' undeclared"
tuxoko Apr 30, 2015
af5ab05
Reduce unnecessary dependency on abd.h
tuxoko May 11, 2015
75b7236
Use PAGESIZE instead of PAGE_SIZE
tuxoko May 11, 2015
19dba64
Remove unnecessary type conversion to void *
tuxoko May 11, 2015
77d8edd
Chained scatterlist
tuxoko May 13, 2015
3febce5
Use inline function instead of non-standard stmt-expr
tuxoko May 14, 2015
5c02305
Misc
tuxoko May 14, 2015
d10b0e3
Fix vdev_disk retry leak
tuxoko May 14, 2015
100f259
Merge branch 'master' into abd
tuxoko May 14, 2015
f659c09
Optimize abd_buf_segment for small scatterlist
tuxoko May 14, 2015
fd90d5a
Use supplied function in kmap-atomic-args.m4
tuxoko May 14, 2015
d057998
Fix possible bug in abd_buf_segment
tuxoko May 17, 2015
1eec47d
Add helper macros to access arcs_list and arcs_lsize
tuxoko May 18, 2015
4fef1fa
Allocate ABD buffer according to BUFC type in _cb functions
tuxoko May 19, 2015
a2929cb
Misc
tuxoko May 25, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

#include <stdio.h>
Expand All @@ -30,6 +31,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
Expand Down Expand Up @@ -1262,13 +1264,13 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
ASSERT(buf->b_data);

/* recursively visit blocks below this */
cbp = buf->b_data;
for (i = 0; i < epb; i++, cbp++) {
for (i = 0; i < epb; i++) {
zbookmark_phys_t czb;

SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
cbp = abd_array(buf->b_data, i, blkptr_t);
err = visit_indirect(spa, dnp, cbp, &czb);
if (err)
break;
Expand Down Expand Up @@ -1434,7 +1436,7 @@ dump_bptree(objset_t *os, uint64_t obj, char *name)
return;

VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;
bt = ABD_TO_BUF(db->db_data);
zdb_nicenum(bt->bt_bytes, bytes);
(void) printf("\n %s: %llu datasets, %s\n",
name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
Expand Down Expand Up @@ -1885,7 +1887,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
if (error)
fatal("dmu_bonus_hold(%llu) failed, errno %u",
object, error);
bonus = db->db_data;
bonus = ABD_TO_BUF(db->db_data);
bsize = db->db_size;
dn = DB_DNODE((dmu_buf_impl_t *)db);
}
Expand Down Expand Up @@ -2108,7 +2110,7 @@ dump_config(spa_t *spa)
spa->spa_config_object, FTAG, &db);

if (error == 0) {
nvsize = *(uint64_t *)db->db_data;
nvsize = *(uint64_t *)ABD_TO_BUF(db->db_data);
dmu_buf_rele(db, FTAG);

(void) printf("\nMOS Configuration:\n");
Expand Down Expand Up @@ -2431,7 +2433,7 @@ zdb_blkptr_done(zio_t *zio)
zdb_cb_t *zcb = zio->io_private;
zbookmark_phys_t *zb = &zio->io_bookmark;

zio_data_buf_free(zio->io_data, zio->io_size);
abd_free(zio->io_data, zio->io_size);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
Expand Down Expand Up @@ -2494,7 +2496,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
abd_t *data = abd_alloc_linear(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;

/* If it's an intent log block, failure is expected. */
Expand Down Expand Up @@ -3285,6 +3287,7 @@ zdb_read_block(char *thing, spa_t *spa)
zio_t *zio;
vdev_t *vd;
void *pbuf, *lbuf, *buf;
abd_t *pbuf_abd;
char *s, *p, *dup, *vdev, *flagstr;
int i, error;

Expand Down Expand Up @@ -3356,6 +3359,7 @@ zdb_read_block(char *thing, spa_t *spa)
lsize = size;

pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, 512, UMEM_NOFAIL);
pbuf_abd = abd_get_from_buf(pbuf, SPA_MAXBLOCKSIZE);
lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);

BP_ZERO(bp);
Expand Down Expand Up @@ -3383,15 +3387,15 @@ zdb_read_block(char *thing, spa_t *spa)
/*
* Treat this as a normal block read.
*/
zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
zio_nowait(zio_read(zio, spa, bp, pbuf_abd, psize, NULL, NULL,
ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
} else {
/*
* Treat this as a vdev child I/O.
*/
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf_abd,
psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
Expand Down Expand Up @@ -3465,6 +3469,7 @@ zdb_read_block(char *thing, spa_t *spa)
zdb_dump_block(thing, buf, size, flags);

out:
abd_put(pbuf_abd);
umem_free(pbuf, SPA_MAXBLOCKSIZE);
umem_free(lbuf, SPA_MAXBLOCKSIZE);
free(dup);
Expand Down
7 changes: 6 additions & 1 deletion cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
Expand All @@ -36,6 +37,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/stat.h>
Expand Down Expand Up @@ -125,6 +127,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_phys_t zb;
char buf[SPA_MAXBLOCKSIZE];
abd_t *abd;
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;

Expand Down Expand Up @@ -158,9 +161,11 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));

abd = abd_get_from_buf(buf, BP_GET_LSIZE(bp));
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
bp, abd, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
abd_put(abd);
if (error)
return;
data = buf;
Expand Down
37 changes: 24 additions & 13 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

/*
Expand Down Expand Up @@ -86,6 +87,7 @@
*/

#include <sys/zfs_context.h>
#include <sys/abd.h>
#include <sys/spa.h>
#include <sys/dmu.h>
#include <sys/txg.h>
Expand Down Expand Up @@ -1310,26 +1312,31 @@ ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
}

static void
ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
ztest_pattern_set(abd_t *abd, uint64_t size, uint64_t value)
{
void *buf = abd_borrow_buf(abd, size);
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);

while (ip < ip_end)
*ip++ = value;

abd_return_buf_copy(abd, buf, size);
}

#ifndef NDEBUG
static boolean_t
ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
ztest_pattern_match(abd_t *abd, uint64_t size, uint64_t value)
{
void *buf = abd_borrow_buf_copy(abd, size);
uint64_t *ip = buf;
uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
uint64_t diff = 0;

while (ip < ip_end)
diff |= (value - *ip++);

abd_return_buf(abd, buf, size);
return (diff == 0);
}
#endif
Expand Down Expand Up @@ -1369,7 +1376,8 @@ ztest_bt_bonus(dmu_buf_t *db)
dmu_object_info_from_db(db, &doi);
ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
bt = (void *)((char *)ABD_TO_BUF(db->db_data) + doi.doi_bonus_size -
sizeof (*bt));

return (bt);
}
Expand Down Expand Up @@ -1731,7 +1739,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
if (abuf == NULL) {
dmu_write(os, lr->lr_foid, offset, length, data, tx);
} else {
bcopy(data, abuf->b_data, length);
abd_copy_from_buf(abuf->b_data, data, length);
dmu_assign_arcbuf(db, offset, abuf, tx);
}

Expand Down Expand Up @@ -4126,16 +4134,19 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
dmu_buf_t *dbt;
if (i != 5) {
bcopy((caddr_t)bigbuf + (off - bigoff),
bigbuf_arcbufs[j]->b_data, chunksize);
abd_copy_from_buf(bigbuf_arcbufs[j]->b_data,
(caddr_t)bigbuf + (off - bigoff),
chunksize);
} else {
bcopy((caddr_t)bigbuf + (off - bigoff),
abd_copy_from_buf(
bigbuf_arcbufs[2 * j]->b_data,
(caddr_t)bigbuf + (off - bigoff),
chunksize / 2);
bcopy((caddr_t)bigbuf + (off - bigoff) +
chunksize / 2,

abd_copy_from_buf(
bigbuf_arcbufs[2 * j + 1]->b_data,
chunksize / 2);
(caddr_t)bigbuf + (off - bigoff) +
chunksize / 2, chunksize / 2);
}

if (i == 1) {
Expand Down Expand Up @@ -5189,7 +5200,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
enum zio_checksum checksum = spa_dedup_checksum(spa);
dmu_buf_t *db;
dmu_tx_t *tx;
void *buf;
abd_t *buf;
blkptr_t blk;
int copies = 2 * ZIO_DEDUPDITTO_MIN;
int i;
Expand Down Expand Up @@ -5270,14 +5281,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
* Damage the block. Dedup-ditto will save us when we read it later.
*/
psize = BP_GET_PSIZE(&blk);
buf = zio_buf_alloc(psize);
buf = abd_alloc_linear(psize);
ztest_pattern_set(buf, psize, ~pattern);

(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));

zio_buf_free(buf, psize);
abd_free(buf, psize);

(void) rw_unlock(&ztest_name_lock);
umem_free(od, sizeof (ztest_od_t));
Expand Down
20 changes: 20 additions & 0 deletions config/kernel-kmap-atomic-args.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
dnl #
dnl # 2.6.37 API change
dnl # kmap_atomic changed from assigning hard-coded named slot to using
dnl # push/pop based dynamical allocation.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS], [
AC_MSG_CHECKING([whether kmap_atomic wants 1 args])
ZFS_LINUX_TRY_COMPILE([
#include <linux/pagemap.h>
],[
struct page page;
kmap_atomic(&page);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_1ARG_KMAP_ATOMIC, 1,
[kmap_atomic wants 1 args])
],[
AC_MSG_RESULT(no)
])
])
1 change: 1 addition & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_5ARG_SGET
ZFS_AC_KERNEL_LSEEK_EXECUTE
ZFS_AC_KERNEL_VFS_ITERATE
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS

AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
Expand Down
3 changes: 2 additions & 1 deletion include/linux/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ KERNEL_H = \
$(top_srcdir)/include/linux/xattr_compat.h \
$(top_srcdir)/include/linux/vfs_compat.h \
$(top_srcdir)/include/linux/blkdev_compat.h \
$(top_srcdir)/include/linux/utsname_compat.h
$(top_srcdir)/include/linux/utsname_compat.h \
$(top_srcdir)/include/linux/kmap_compat.h

USER_H =

Expand Down
40 changes: 40 additions & 0 deletions include/linux/kmap_compat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/

#ifndef _ZFS_KMAP_H
#define _ZFS_KMAP_H

#include <linux/highmem.h>

#ifdef HAVE_1ARG_KMAP_ATOMIC
/* 2.6.37 API change */
#define zfs_kmap_atomic(page, km_type) kmap_atomic(page)
#define zfs_kunmap_atomic(addr, km_type) kunmap_atomic(addr)
#else
#define zfs_kmap_atomic(page, km_type) kmap_atomic(page, km_type)
#define zfs_kunmap_atomic(addr, km_type) kunmap_atomic(addr, km_type)
#endif

#endif /* _ZFS_KMAP_H */
1 change: 1 addition & 0 deletions include/sys/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ SUBDIRS = fm fs
COMMON_H = \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/avl.h \
$(top_srcdir)/include/sys/avl_impl.h \
$(top_srcdir)/include/sys/blkptr.h \
Expand Down
Loading