diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/data.py b/src/data.py
deleted file mode 100644
index 8592edfc0c6..00000000000
--- a/src/data.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import os
-import netCDF4 as nc4
-
-from scipy.io import netcdf
-from cStringIO import StringIO
-from collections import OrderedDict
-
-class Attributes(dict):
-    pass
-
-class Variable(object):
-    """
-    A netcdf-like variable consisting of dimensions, data and attributes
-    which describe a single variable.  A single variable object is not
-    fully described outside the context of its parent Dataset.
-    """
-    def __init__(self, dims, data, attributes):
-        self.dimensions = dims
-        self.data = data
-        self.attributes = attributes
-
-    def __getattribute__(self, key):
-        """
-        We want Variable to inherit some of the attributes of
-        the underlaying data.
-        """
-        if key in ['dtype', 'shape', 'size']:
-            return getattr(self.data, key)
-        else:
-            return object.__getattribute__(self, key)
-
-class Dataset(object):
-    """
-    A netcdf-like data object consisting of dimensions, variables and
-    attributes which together form a self describing data set.
-    """
-
-    def _load_scipy(self, scipy_nc, *args, **kwdargs):
-        """
-        Interprets a netcdf file-like object using scipy.io.netcdf.
-        The contents of the netcdf object are loaded into memory.
-        """
-        try:
-            nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
-        except:
-            scipy_nc = StringIO(scipy_nc)
-            scipy_nc.seek(0)
-            nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
-
-        def from_scipy_variable(sci_var):
-            return Variable(dims = sci_var.dimensions,
-                            data = sci_var.data,
-                            attributes = sci_var._attributes)
-
-        object.__setattr__(self, 'attributes', Attributes())
-        self.attributes.update(nc._attributes)
-
-        object.__setattr__(self, 'dimensions', OrderedDict())
-        dimensions = OrderedDict((k, len(d))
-                                 for k, d in nc.dimensions.iteritems())
-        self.dimensions.update(dimensions)
-
-        object.__setattr__(self, 'variables', OrderedDict())
-        OrderedDict = OrderedDict((vn, from_scipy_variable(v))
-                                   for vn, v in nc.variables.iteritems())
-        self.variables.update()
-
-    def _load_netcdf4(self, netcdf_path, *args, **kwdargs):
-        """
-        Interprets the contents of netcdf_path using the netCDF4
-        package.
-        """
-        nc = nc4.Dataset(netcdf_path, *args, **kwdargs)
-
-        def from_netcdf4_variable(nc4_var):
-            attributes = dict((k, nc4_var.getncattr(k)) for k in nc4_var.ncattrs())
-            return Variable(dims = tuple(nc4_var.dimensions),
-                            data = nc4_var[:],
-                            attributes = attributes)
-
-        object.__setattr__(self, 'attributes', Attributes())
-        self.attributes.update(dict((k.encode(), nc.getncattr(k)) for k in nc.ncattrs()))
-
-        object.__setattr__(self, 'dimensions', OrderedDict())
-        dimensions = OrderedDict((k.encode(), len(d)) for k, d in nc.dimensions.iteritems())
-        self.dimensions.update(dimensions)
-
-        object.__setattr__(self, 'variables', OrderedDict())
-        self.variables.update(dict((vn.encode(), from_netcdf4_variable(v))
-                                   for vn, v in nc.variables.iteritems()))
-
-    def __init__(self, nc, *args, **kwdargs):
-        if isinstance(nc, basestring) and not nc.startswith('CDF'):
-            """
-            If the initialization nc is a string and it doesn't
-            appear to be the contents of a netcdf file we load
-            it using the netCDF4 package
-            """
-            self._load_netcdf4(nc, *args, **kwdargs)
-        else:
-            """
-            If nc is a file-like object we read it using
-            the scipy.io.netcdf package
-            """
-            self._load_scipy(nc)
-
-    def __setattr__(self, attr, value):
-        """"__setattr__ is overloaded to prevent operations that could
-        cause loss of data consistency. If you really intend to update
-        dir(self), use the self.__dict__.update method or the
-        super(type(a), self).__setattr__ method to bypass."""
-        raise AttributeError("__setattr__ is disabled")
-
-    def dump(self, filepath, *args, **kwdargs):
-        """
-        Dump the contents to a location on disk using
-        the netCDF4 package
-        """
-        nc = nc4.Dataset(filepath, mode='w', *args, **kwdargs)
-        for d, l in self.dimensions.iteritems():
-            nc.createDimension(d, size=l)
-        for vn, v in self.variables.iteritems():
-            nc.createVariable(vn, v.dtype, v.dimensions)
-            nc.variables[vn][:] = v.data[:]
-            for k, a in v.attributes.iteritems():
-                try:
-                    nc.variables[vn].setncattr(k, a)
-                except:
-                    import pdb; pdb.set_trace()
-
-        nc.setncatts(self.attributes)
-        return nc
-
-    def dumps(self):
-        """
-        Serialize the contents to a string.  The serialization
-        creates an in memory netcdf version 3 string using
-        the scipy.io.netcdf package.
-        """
-        fobj = StringIO()
-        nc = netcdf.netcdf_file(fobj, mode='w')
-        for d, l in self.dimensions.iteritems():
-            nc.createDimension(d, l)
-
-        for vn, v in self.variables.iteritems():
-
-            nc.createVariable(vn, v.dtype, v.dimensions)
-            nc.variables[vn][:] = v.data[:]
-            for k, a in v.attributes.iteritems():
-                setattr(nc.variables[vn], k, a)
-        for k, a in self.attributes.iteritems():
-            setattr(nc, k, a)
-        nc.flush()
-        return fobj.getvalue()
-
-if __name__ == "__main__":
-    """
-    For now this regression test assumes you've downloaded a sample
-    netCDF file and placed it in scidata/test/
-    
-    Heres one way to get going:
-    
-      mkdir test
-      cd test
-      wget http://www.unidata.ucar.edu/software/netcdf/examples/ECMWF_ERA-40_subset.nc
-      
-    """
-    
-    base_dir = os.path.dirname(__file__)
-    test_dir = os.path.join(base_dir, '..', 'test', )
-    write_test_path = os.path.join(test_dir, 'test_output.nc')
-    ecmwf_netcdf = os.path.join(test_dir, 'ECMWF_ERA-40_subset.nc')
-
-    import time
-    st = time.time()
-    nc = Dataset(ecmwf_netcdf)
-    print "Seconds to read from filepath : ", time.time() - st
-
-    st = time.time()
-    nc.dump(write_test_path)
-    print "Seconds to write : ", time.time() - st
-
-    st = time.time()
-    nc_string = nc.dumps()
-    print "Seconds to serialize : ", time.time() - st
-
-    st = time.time()
-    nc = Dataset(nc_string)
-    print "Seconds to deserialize : ", time.time() - st
-
-    st = time.time()
-    with open(ecmwf_netcdf, 'r') as f:
-        nc = Dataset(f)
-    print "Seconds to read from fobj : ", time.time() - st
-
diff --git a/src/polyglot/__init__.py b/src/polyglot/__init__.py
new file mode 100644
index 00000000000..b14e6b51e88
--- /dev/null
+++ b/src/polyglot/__init__.py
@@ -0,0 +1 @@
+from data import Dataset, Variable
\ No newline at end of file
diff --git a/src/polyglot/conventions.py b/src/polyglot/conventions.py
new file mode 100644
index 00000000000..351d3cc9297
--- /dev/null
+++ b/src/polyglot/conventions.py
@@ -0,0 +1,129 @@
+import numpy as np
+import unicodedata
+
+NULL          = '\x00'
+NC_BYTE       = '\x00\x00\x00\x01'
+NC_CHAR       = '\x00\x00\x00\x02'
+NC_SHORT      = '\x00\x00\x00\x03'
+# netCDF-3 only supports 32-bit integers
+NC_INT        = '\x00\x00\x00\x04'
+NC_FLOAT      = '\x00\x00\x00\x05'
+NC_DOUBLE     = '\x00\x00\x00\x06'
+
+# Map between netCDF type and numpy dtype and vice versa. Due to a bug
+# in the __hash__() method of numpy dtype objects (fixed in development
+# release of numpy), we need to explicitly match byteorder for dict
+# lookups to succeed. Here we normalize to native byte order.
+#
+# NC_CHAR is a special case because netCDF represents strings as
+# character arrays. When NC_CHAR is encountered as the type of an
+# attribute value, this TYPEMAP is not consulted and the data is read
+# as a string. However, when NC_CHAR is encountered as the type of a
+# variable, then the data is read is a numpy array of 1-char elements
+# (equivalently, length-1 raw "strings"). There is no support for numpy
+# arrays of multi-character strings.
+TYPEMAP = {
+        # we could use np.dtype's as key/values except __hash__ comparison of
+        # numpy.dtype is broken in older versions of numpy.  If you must compare
+        # and cannot upgrade, use __eq__.This bug is
+        # known to be fixed in numpy version 1.3
+        NC_BYTE: 'int8',
+        NC_CHAR: '|S1',
+        NC_SHORT: 'int16',
+        NC_INT: 'int32',
+        NC_FLOAT: 'float32',
+        NC_DOUBLE: 'float64',
+        }
+for k in TYPEMAP.keys():
+    TYPEMAP[TYPEMAP[k]] = k
+
+# Special characters that are permitted in netCDF names except in the
+# 0th position of the string
+_specialchars = '_.@+- !"#$%&\()*,:;<=>?[]^`{|}~'
+
+# The following are reserved names in CDL and may not be used as names of
+# variables, dimension, attributes
+_reserved_names = set([
+        'byte',
+        'char',
+        'short',
+        'ushort',
+        'int',
+        'uint',
+        'int64',
+        'uint64',
+        'float'
+        'real',
+        'double',
+        'bool',
+        'string',
+        ])
+
+def coerce_type(arr):
+    """Coerce a numeric data type to a type that is compatible with
+    netCDF-3
+
+    netCDF-3 can not handle 64-bit integers, but on most platforms
+    Python integers are int64. To work around this discrepancy, this
+    helper function coerces int64 arrays to int32. An exception is
+    raised if this coercion is not safe.
+
+    netCDF-3 can not handle booleans, but booleans can be trivially
+    (albeit wastefully) represented as bytes. To work around this
+    discrepancy, this helper function coerces bool arrays to int8.
+    """
+    # Comparing the char attributes of numpy dtypes is inelegant, but this is
+    # the fastest test of equivalence that is invariant to endianness
+    if arr.dtype.char == 'l': # np.dtype('int64')
+        cast_arr = arr.astype(
+                np.dtype('int32').newbyteorder(arr.dtype.byteorder))
+        if not (cast_arr == arr).all():
+            raise ValueError("array contains integer values that " +
+                    "are not representable as 32-bit signed integers")
+        return cast_arr
+    elif arr.dtype.char == '?': # np.dtype('bool')
+        # bool
+        cast_arr = arr.astype(
+                np.dtype('int8').newbyteorder(arr.dtype.byteorder))
+        return cast_arr
+    else:
+        return arr
+
+def _isalnumMUTF8(c):
+    """Return True if the given UTF-8 encoded character is alphanumeric
+    or multibyte.
+
+    Input is not checked!
+    """
+    return (c.isalnum() or (len(c.encode('utf-8')) > 1))
+
+def is_valid_name(s):
+    """Test whether an object can be validly converted to a netCDF
+    dimension, variable or attribute name
+
+    Earlier versions of the netCDF C-library reference implementation
+    enforced a more restricted set of characters in creating new names,
+    but permitted reading names containing arbitrary bytes. This
+    specification extends the permitted characters in names to include
+    multi-byte UTF-8 encoded Unicode and additional printing characters
+    from the US-ASCII alphabet. The first character of a name must be
+    alphanumeric, a multi-byte UTF-8 character, or '_' (reserved for
+    special names with meaning to implementations, such as the
+    "_FillValue" attribute). Subsequent characters may also include
+    printing special characters, except for '/' which is not allowed in
+    names. Names that have trailing space characters are also not
+    permitted.
+    """
+    if not isinstance(s, basestring):
+        return False
+    if not isinstance(s, unicode):
+        s = unicode(s, 'utf-8')
+    num_bytes = len(s.encode('utf-8'))
+    return ((unicodedata.normalize('NFC', s) == s) and
+            (s not in _reserved_names) and
+            (num_bytes >= 0) and
+            ('/' not in s) and
+            (s[-1] != ' ') and
+            (_isalnumMUTF8(s[0]) or (s[0] == '_')) and
+            all((_isalnumMUTF8(c) or c in _specialchars for c in s))
+            )
\ No newline at end of file
diff --git a/src/polyglot/data.py b/src/polyglot/data.py
new file mode 100644
index 00000000000..fe99d81728b
--- /dev/null
+++ b/src/polyglot/data.py
@@ -0,0 +1,1233 @@
+# TODO Use various backend data stores. pytable, ncdf4, scipy.io, iris, memory
+
+import os
+import copy
+import itertools
+import unicodedata
+
+import numpy as np
+import netCDF4 as nc4
+
+from operator import or_
+from scipy.io import netcdf
+from cStringIO import StringIO
+from collections import OrderedDict
+
+import conventions
+
+date2num = nc4.date2num
+num2date = nc4.num2date
+
+def _prettyprint(x, numchars):
+    """Given an object x, call x.__str__() and format the returned
+    string so that it is numchars long, padding with trailing spaces or
+    truncating with ellipses as necessary"""
+    s = str(x).rstrip(conventions.NULL)
+    if len(s) > numchars:
+        return s[:(numchars - 3)] + '...'
+    else:
+        return s
+
+class AttributesDict(OrderedDict):
+    """A subclass of OrderedDict whose __setitem__ method automatically
+    checks and converts values to be valid netCDF attributes
+    """
+    def __init__(self, *args, **kwds):
+        OrderedDict.__init__(self, *args, **kwds)
+
+    def __setitem__(self, key, value):
+        if not conventions.is_valid_name(key):
+            raise ValueError("Not a valid attribute name")
+        # Strings get special handling because netCDF treats them as
+        # character arrays. Everything else gets coerced to a numpy
+        # vector. netCDF treats scalars as 1-element vectors. Arrays of
+        # non-numeric type are not allowed.
+        if isinstance(value, basestring):
+            # netcdf attributes should be unicode
+            value = unicode(value)
+        else:
+            try:
+                value = conventions.coerce_type(np.atleast_1d(np.asarray(value)))
+            except:
+                raise ValueError("Not a valid value for a netCDF attribute")
+            if value.ndim > 1:
+                raise ValueError("netCDF attributes must be vectors " +
+                        "(1-dimensional)")
+            value = conventions.coerce_type(value)
+            if str(value.dtype) not in conventions.TYPEMAP:
+                # A plain string attribute is okay, but an array of
+                # string objects is not okay!
+                raise ValueError("Can not convert to a valid netCDF type")
+        OrderedDict.__setitem__(self, key, value)
+
+    def copy(self):
+        """The copy method of the superclass simply calls the constructor,
+        which in turn calls the update method, which in turns calls
+        __setitem__. This subclass implementation bypasses the expensive
+        validation in __setitem__ for a substantial speedup."""
+        obj = self.__class__()
+        for (attr, value) in self.iteritems():
+            OrderedDict.__setitem__(obj, attr, copy.copy(value))
+        return obj
+
+    def __deepcopy__(self, memo=None):
+        """
+        Returns a deep copy of the current object.
+
+        memo does nothing but is required for compatability with copy.deepcopy
+        """
+        return self.copy()
+
+    def update(self, *other, **kwargs):
+        """Set multiple attributes with a mapping object or an iterable of
+        key/value pairs"""
+        # Capture arguments in an OrderedDict
+        args_dict = OrderedDict(*other, **kwargs)
+        try:
+            # Attempt __setitem__
+            for (attr, value) in args_dict.iteritems():
+                self.__setitem__(attr, value)
+        except:
+            # A plain string attribute is okay, but an array of
+            # string objects is not okay!
+            raise ValueError("Can not convert to a valid netCDF type")
+            # Clean up so that we don't end up in a partial state
+            for (attr, value) in args_dict.iteritems():
+                if self.__contains__(attr):
+                    self.__delitem__(attr)
+            # Re-raise
+            raise
+
+    def __eq__(self, other):
+        if not set(self.keys()) == set(other.keys()):
+            return False
+        for (key, value) in self.iteritems():
+            if value.__class__ != other[key].__class__:
+                return False
+            if isinstance(value, basestring):
+                if value != other[key]:
+                    return False
+            else:
+                if value.tostring() != other[key].tostring():
+                    return False
+        return True
+
+class Variable(object):
+    """
+    A netcdf-like variable consisting of dimensions, data and attributes
+    which describe a single variable.  A single variable object is not
+    fully described outside the context of its parent Dataset.
+    """
+    def __init__(self, dims, data, attributes=None):
+        object.__setattr__(self, 'dimensions', dims)
+        object.__setattr__(self, 'data', data)
+        if attributes is None:
+            attributes = {}
+        object.__setattr__(self, 'attributes', AttributesDict(attributes))
+
+    def _allocate(self):
+        return self.__class__(dims=(), data=0)
+
+    def __getattribute__(self, key):
+        """
+        Here we give some of the attributes of self.data preference over
+        attributes in the object instelf.
+        """
+        if key in ['dtype', 'shape', 'size', 'ndim', 'nbytes',
+                'flat', '__iter__', 'view']:
+            return getattr(self.data, key)
+        else:
+            return object.__getattribute__(self, key)
+
+    def __setattr__(self, attr, value):
+        """"__setattr__ is overloaded to prevent operations that could
+        cause loss of data consistency. If you really intend to update
+        dir(self), use the self.__dict__.update method or the
+        super(type(a), self).__setattr__ method to bypass."""
+        raise AttributeError, "Object is tamper-proof"
+
+    def __delattr__(self, attr):
+        raise AttributeError, "Object is tamper-proof"
+
+    def __getitem__(self, index):
+        """__getitem__ is overloaded to access the underlying numpy data"""
+        return self.data[index]
+
+    def __setitem__(self, index, data):
+        """__setitem__ is overloaded to access the underlying numpy data"""
+        self.data[index] = data
+
+    def __hash__(self):
+        """__hash__ is overloaded to guarantee that two variables with the same
+        attributes and np.data values have the same hash (the converse is not true)"""
+        return hash((self.dimensions,
+                     frozenset((k,v.tostring()) if isinstance(v,np.ndarray) else (k,v)
+                               for (k,v) in self.attributes.items()),
+                     self.data.tostring()))
+
+    def __len__(self):
+        """__len__ is overloaded to access the underlying numpy data"""
+        return self.data.__len__()
+
+    def __copy__(self):
+        """
+        Returns a shallow copy of the current object.
+        """
+        # Create the simplest possible dummy object and then overwrite it
+        obj = self._allocate()
+        object.__setattr__(obj, 'dimensions', self.dimensions)
+        object.__setattr__(obj, 'data', self.data)
+        object.__setattr__(obj, 'attributes', self.attributes)
+        return obj
+
+    def __deepcopy__(self, memo=None):
+        """
+        Returns a deep copy of the current object.
+
+        memo does nothing but is required for compatability with copy.deepcopy
+        """
+        # Create the simplest possible dummy object and then overwrite it
+        obj = self._allocate()
+        # tuples are immutable
+        object.__setattr__(obj, 'dimensions', self.dimensions)
+        object.__setattr__(obj, 'data', self.data[:].copy())
+        object.__setattr__(obj, 'attributes', self.attributes.copy())
+        return obj
+
+    def __eq__(self, other):
+        if self.dimensions != other.dimensions or \
+           (self.data.tostring() != other.data.tostring()):
+            return False
+        if not self.attributes == other.attributes:
+            return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __str__(self):
+        """Create a ncdump-like summary of the object"""
+        summary = ["dimensions:"]
+        # prints dims that look like:
+        #    dimension = length
+        dim_print = lambda d, l : "\t%s : %s" % (_prettyprint(d, 30),
+                                                 _prettyprint(l, 10))
+        # add each dimension to the summary
+        summary.extend([dim_print(d, l) for d, l in zip(self.dimensions, self.shape)])
+        summary.append("type : %s" % (_prettyprint(var.dtype, 8)))
+        summary.append("\nattributes:")
+        #    attribute:value
+        summary.extend(["\t%s:%s" % (_prettyprint(att, 30),
+                                     _prettyprint(val, 30))
+                        for att, val in self.attributes.iteritems()])
+        # create the actual summary
+        return '\n'.join(summary)
+
+    def views(self, slicers):
+        """Return a new Variable object whose contents are a view of the object
+        sliced along a specified dimension.
+
+        Parameters
+        ----------
+        slicers : {dim: slice, ...}
+            A dictionary mapping from dim to slice, dim represents
+            the dimension to slice along slice represents the range of the
+            values to extract.
+
+        Returns
+        -------
+        obj : Variable object
+            The returned object has the same attributes and dimensions
+            as the original. Data contents are taken along the
+            specified dimension.  Care must be taken since modifying (most)
+            values in the returned object will result in modification to the
+            parent object.
+
+        See Also
+        --------
+        view
+        take
+        """
+        slices = [slice(None)] * self.data.ndim
+        for i, dim in enumerate(self.dimensions):
+            if dim in slicers:
+                slices[i] = slicers[dim]
+        # Shallow copy
+        obj = copy.copy(self)
+        object.__setattr__(obj, 'data', self.data[slices])
+        return obj
+
+    def view(self, s, dim):
+        """Return a new Variable object whose contents are a view of the object
+        sliced along a specified dimension.
+
+        Parameters
+        ----------
+        s : slice
+            The slice representing the range of the values to extract.
+        dim : string
+            The dimension to slice along. If multiple dimensions equal
+            dim (e.g. a correlation matrix), then the slicing is done
+            only along the first matching dimension.
+
+        Returns
+        -------
+        obj : Variable object
+            The returned object has the same attributes and dimensions
+            as the original. Data contents are taken along the
+            specified dimension.  Care must be taken since modifying (most)
+            values in the returned object will result in modification to the
+            parent object.
+
+        See Also
+        --------
+        take
+        """
+        return self.views({dim : s})
+
+    def take(self, indices, dim):
+        """Return a new Variable object whose contents are sliced from
+        the current object along a specified dimension
+
+        Parameters
+        ----------
+        indices : array_like
+            The indices of the values to extract. indices must be compatible
+            with the ndarray.take() method.
+        dim : string
+            The dimension to slice along. If multiple dimensions equal
+            dim (e.g. a correlation matrix), then the slicing is done
+            only along the first matching dimension.
+
+        Returns
+        -------
+        obj : Variable object
+            The returned object has the same attributes and dimensions
+            as the original. Data contents are taken along the
+            specified dimension.
+
+        See Also
+        --------
+        numpy.take
+        """
+        indices = np.asarray(indices)
+        if indices.ndim != 1:
+            raise ValueError('indices should have a single dimension')
+        # When dim appears repeatedly in self.dimensions, using the index()
+        # method gives us only the first one, which is the desired behavior
+        axis = list(self.dimensions).index(dim)
+        # Deep copy
+        obj = copy.deepcopy(self)
+        # In case data is lazy we need to slice out all the data before taking.
+        object.__setattr__(obj, 'data', self.data[:].take(indices, axis=axis))
+        return obj
+
+class Dataset(object):
+    """
+    A netcdf-like data object consisting of dimensions, variables and
+    attributes which together form a self describing data set.
+    """
+    def _allocate(self):
+        return self.__class__()
+
+    def _load_scipy(self, scipy_nc, *args, **kwdargs):
+        """
+        Interprets a netcdf file-like object using scipy.io.netcdf.
+        The contents of the netcdf object are loaded into memory.
+        """
+        try:
+            nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
+        except:
+            scipy_nc = StringIO(scipy_nc)
+            scipy_nc.seek(0)
+            nc = netcdf.netcdf_file(scipy_nc, mode='r', *args, **kwdargs)
+
+        def from_scipy_variable(sci_var):
+            return Variable(dims = sci_var.dimensions,
+                            data = sci_var.data,
+                            attributes = sci_var._attributes)
+
+        object.__setattr__(self, 'attributes', AttributesDict())
+        self.attributes.update(nc._attributes)
+
+        object.__setattr__(self, 'dimensions', OrderedDict())
+        dimensions = OrderedDict((k, len(d))
+                                 for k, d in nc.dimensions.iteritems())
+        self.dimensions.update(dimensions)
+
+        object.__setattr__(self, 'variables', OrderedDict())
+        OrderedDict = OrderedDict((vn, from_scipy_variable(v))
+                                   for vn, v in nc.variables.iteritems())
+        self.variables.update()
+
+    def _load_netcdf4(self, netcdf_path, *args, **kwdargs):
+        """
+        Interprets the contents of netcdf_path using the netCDF4
+        package.
+        """
+        nc = nc4.Dataset(netcdf_path, *args, **kwdargs)
+
+        object.__setattr__(self, 'attributes', AttributesDict())
+        self.attributes.update(dict((k.encode(), nc.getncattr(k)) for k in nc.ncattrs()))
+
+        object.__setattr__(self, 'dimensions', OrderedDict())
+        dimensions = OrderedDict((k.encode(), len(d)) for k, d in nc.dimensions.iteritems())
+        self.dimensions.update(dimensions)
+
+        def from_netcdf4_variable(nc4_var):
+            attributes = dict((k, nc4_var.getncattr(k)) for k in nc4_var.ncattrs())
+            return Variable(dims = tuple(nc4_var.dimensions),
+                            # TODO : this variable copy is lazy and
+                            # might cause issues in the future.
+                            data = nc4_var,
+                            attributes = attributes)
+
+        object.__setattr__(self, 'variables', OrderedDict())
+        self.variables.update(dict((vn.encode(), from_netcdf4_variable(v))
+                                   for vn, v in nc.variables.iteritems()))
+
+    def __init__(self, nc = None, *args, **kwdargs):
+        if isinstance(nc, basestring) and not nc.startswith('CDF'):
+            """
+            If the initialization nc is a string and it doesn't
+            appear to be the contents of a netcdf file we load
+            it using the netCDF4 package
+            """
+            self._load_netcdf4(nc, *args, **kwdargs)
+        elif nc is None:
+            object.__setattr__(self, 'attributes', AttributesDict())
+            object.__setattr__(self, 'dimensions', OrderedDict())
+            object.__setattr__(self, 'variables', OrderedDict())
+        else:
+            """
+            If nc is a file-like object we read it using
+            the scipy.io.netcdf package
+            """
+            self._load_scipy(nc)
+
+
+    def __setattr__(self, attr, value):
+        """"__setattr__ is overloaded to prevent operations that could
+        cause loss of data consistency. If you really intend to update
+        dir(self), use the self.__dict__.update method or the
+        super(type(a), self).__setattr__ method to bypass."""
+        raise AttributeError("__setattr__ is disabled")
+
+    def __contains__(self, key):
+        """
+        The 'in' operator will return true or false depending on
+        whether 'key' is a varibale in the data object or not.
+        """
+        return key in self.variables
+
+    def __eq__(self, other):
+        if not isinstance(other, Dataset):
+            return False
+        if dict(self.dimensions) != dict(other.dimensions):
+            return False
+        if not dict(self.variables) == dict(other.variables):
+            return False
+        if not self.attributes == other.attributes:
+            return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    @property
+    def coordinates(self):
+        # A coordinate variable is a 1-dimensional variable with the
+        # same name as its dimension
+        return OrderedDict([(dim, length)
+                for (dim, length) in self.dimensions.iteritems()
+                if (dim in self.variables) and
+                (self.variables[dim].data.ndim == 1) and
+                (self.variables[dim].dimensions == (dim,))
+                ])
+
+    @property
+    def noncoordinates(self):
+        # A coordinate variable is a 1-dimensional variable with the
+        # same name as its dimension
+        return OrderedDict([(name, v)
+                for (name, v) in self.variables.iteritems()
+                if name not in self.coordinates])
+
+    def dump(self, filepath, *args, **kwdargs):
+        """
+        Dump the contents to a location on disk using
+        the netCDF4 package
+        """
+        nc = nc4.Dataset(filepath, mode='w', *args, **kwdargs)
+        for d, l in self.dimensions.iteritems():
+            nc.createDimension(d, size=l)
+        for vn, v in self.variables.iteritems():
+            nc.createVariable(vn, v.dtype, v.dimensions)
+            nc.variables[vn][:] = v.data[:]
+            for k, a in v.attributes.iteritems():
+                try:
+                    nc.variables[vn].setncattr(k, a)
+                except:
+                    import pdb; pdb.set_trace()
+
+        nc.setncatts(self.attributes)
+        return nc
+
+    def dumps(self):
+        """
+        Serialize the contents to a string.  The serialization
+        creates an in memory netcdf version 3 string using
+        the scipy.io.netcdf package.
+        """
+        # TODO : this (may) effectively double the amount of
+        # data held in memory.  It'd be nice to stream the
+        # serialized string.
+        fobj = StringIO()
+        nc = netcdf.netcdf_file(fobj, mode='w')
+        # copy the dimensions
+        for d, l in self.dimensions.iteritems():
+            nc.createDimension(d, l)
+        # copy the variables
+        for vn, v in self.variables.iteritems():
+            nc.createVariable(vn, v.dtype, v.dimensions)
+            nc.variables[vn][:] = v.data[:]
+            for k, a in v.attributes.iteritems():
+                setattr(nc.variables[vn], k, a)
+        # copy the attributes
+        for k, a in self.attributes.iteritems():
+            setattr(nc, k, a)
+        # flush to the StringIO object
+        nc.flush()
+        return fobj.getvalue()
+
+    def __str__(self):
+        """Create a ncdump-like summary of the object"""
+        summary = ["dimensions:"]
+        # prints dims that look like:
+        #    dimension = length
+        dim_print = lambda d, l : "\t%s = %s" % (_prettyprint(d, 30),
+                                                 _prettyprint(l, 10))
+        # add each dimension to the summary
+        summary.extend([dim_print(d, l) for d, l in self.dimensions.iteritems()])
+
+        # Print variables
+        summary.append("\nvariables:")
+        for vname, var in self.variables.iteritems():
+            # this looks like:
+            #    dtype name(dim1, dim2)
+            summary.append("\t%s %s(%s)" % (_prettyprint(var.dtype, 8),
+                                            _prettyprint(vname, 20),
+                                            _prettyprint(', '.join(var.dimensions), 45)))
+            #        attribute:value
+            summary.extend(["\t\t%s:%s" % (_prettyprint(att, 30),
+                                           _prettyprint(val, 30))
+                            for att, val in var.attributes.iteritems()])
+
+        summary.append("\nattributes:")
+        #    attribute:value
+        summary.extend(["\t%s:%s" % (_prettyprint(att, 30),
+                                     _prettyprint(val, 30))
+                        for att, val in self.attributes.iteritems()])
+        # create the actual summary
+        return '\n'.join(summary)
+
+    def __getitem__(self, key):
+        if key in self.variables:
+            return self.variables[key]
+        else:
+            raise ValueError("%s is not a variable" % key)
+
+    def unchecked_set_dimensions(self, dimensions):
+        object.__setattr__(self, 'dimensions', dimensions)
+
+    def unchecked_create_dimension(self, name, length):
+        self.dimensions[name] = length
+
+    def create_dimension(self, name, length):
+        """Adds a dimension with name dim and length to the object
+
+        Parameters
+        ----------
+        name : string
+            The name of the new dimension. An exception will be raised if the
+            object already has a dimension with this name.
+        length : int or None
+            The length of the new dimension; must be non-negative and
+            representable as a signed 32-bit integer.
+        """
+        if name in self.dimensions:
+            raise ValueError("Dimension named '%s' already exists" % name)
+        if length is None:
+            # unlimted dimensions aren't allowed yet
+            raise ValueError(" unlimited dimensions are not allowed")
+        else:
+            if not isinstance(length, int):
+                raise TypeError("Dimension length must be int")
+            assert length >= 0
+        self.unchecked_create_dimension(name, length)
+
+    def unchecked_set_attributes(self, attributes):
+        object.__setattr__(self, 'attributes', attributes)
+
+    def unchecked_add_variable(self, name, variable):
+        self.variables[name] = variable
+        return self.variables[name]
+
+    def unchecked_create_variable(self, name, dims, data, attributes):
+        v = Variable(dims=dims, data=data, attributes=attributes)
+        return self.unchecked_add_variable(name, v)
+
+    def create_variable(self, name, dims, data, attributes=None):
+        """Create a new variable.
+
+        Parameters
+        ----------
+        name : string
+            The name of the new variable. An exception will be raised
+            if the object already has a variable with this name. name
+            must satisfy netCDF-3 naming rules. If name equals the name
+            of a dimension, then the new variable is treated as a
+            coordinate variable and must be 1-dimensional.
+        dims : tuple
+            The dimensions of the new variable. Elements must be dimensions of
+            the object.
+        data : numpy.ndarray or None, optional
+            Data to populate the new variable. If None (default), then
+            an empty numpy array is allocated with the appropriate
+            shape and dtype. If data contains int64 integers, it will
+            be coerced to int32 (for the sake of netCDF compatibility),
+            and an exception will be raised if this coercion is not
+            safe.
+        attributes : dict_like or None, optional
+            Attributes to assign to the new variable. Attribute names
+            must be unique and must satisfy netCDF-3 naming rules. If
+            None (default), an empty attribute dictionary is
+            initialized.
+
+        Returns
+        -------
+        var : Variable
+            Reference to the newly created variable.
+        """
+        if name in self.variables:
+            raise ValueError("Variable named '%s' already exists" % (name))
+
+        if not all([(d in self.dimensions) for d in dims]):
+            bad = [d for d in dims if (d not in self.dimensions)]
+            raise ValueError("the following dim(s) are not valid " +
+                    "dimensions of this object: %s" % bad)
+
+        data = np.asarray(data)
+        for axis, cdim in enumerate(dims):
+            if (not (data.shape[axis] == self.dimensions[cdim])):
+                raise ValueError("data shape does not match dimensions: " +
+                                 "axis %d (dims '%s'). " %
+                                 (axis, cdim) +
+                                 "expected length %d, got %d." %
+                                 (self.dimensions[cdim],
+                                  data.shape[axis]))
+        if (name in self.dimensions) and (data.ndim != 1):
+            raise ValueError("A coordinate variable must be defined with " +
+                             "1-dimensional data")
+        return self.unchecked_create_variable(name, dims, data, attributes)
+
+    def unchecked_create_coordinate(self, name, data, attributes):
+        self.unchecked_create_dimension(name, data.size)
+        return self.unchecked_create_variable(name, (name,), data, attributes)
+
+    def create_coordinate(self, name, data, attributes=None):
+        """Create a new dimension and a corresponding coordinate variable.
+
+        This method combines the create_dimension and create_variable methods
+        for the common case when the variable is a 1-dimensional coordinate
+        variable with the same name as the dimension.
+
+        Parameters
+        ----------
+        name : string
+            The name of the new dimension and variable. An exception
+            will be raised if the object already has a dimension or
+            variable with this name. name must satisfy netCDF-3 naming
+            rules.
+        data : array_like
+            The coordinate values along this dimension; must be
+            1-dimensional.  The dtype of data is the dtype of the new
+            coordinate variable, and the size of data is the length of
+            the new dimension. If data contains int64 integers, it will
+            be coerced to int32 (for the sake of netCDF compatibility),
+            and an exception will be raised if this coercion is not
+            safe.
+        attributes : dict_like or None, optional
+            Attributes to assign to the new variable. Attribute names
+            must be unique and must satisfy netCDF-3 naming rules. If
+            None (default), an empty attribute dictionary is
+            initialized.
+
+        Returns
+        -------
+        var : Variable
+            Reference to the newly created coordinate variable.
+        """
+        data = np.asarray(data)
+        if data.ndim != 1:
+            raise ValueError("data must be 1-dimensional (vector)")
+        # We need to be cleanly roll back the effects of
+        # create_dimension if create_variable fails, otherwise we will
+        # end up in a partial state.
+        if data.ndim != 1:
+            raise ValueError("coordinate must have ndim==1")
+        return self.unchecked_create_coordinate(name, data, attributes)
+
+    def add_variable(self, name, variable):
+        """A convenience function for adding a variable from one object to
+        another.
+
+        Parameters:
+        name : string - The name under which the variable will be added
+        variable : core.Variable - The variable to be added. If the desired
+            action is to add a copy of the variable be sure to do so before
+            passing it to this function.
+        """
+        # any error checking should be taken care of by create_variable
+        return self.create_variable(name,
+                                    dims=variable.dimensions,
+                                    data=variable.data,
+                                    attributes=variable.attributes)
+
+    def delete_variable(self, name):
+        """Delete a variable. Dimensions on which the variable is
+        defined are not affected.
+
+        Parameters
+        ----------
+        name : string
+            The name of the variable to be deleted. An exception will
+            be raised if there is no variable with this name.
+        """
+        if name not in self.variables:
+            raise ValueError("Object does not have a variable '%s'" %
+                    (str(name)))
+        else:
+
+            super(type(self.variables), self.variables).__delitem__(name)
+
+    def views(self, slicers):
+        """Return a new object whose contents are a view of a slice from the
+        current object along a specified dimension
+
+        Parameters
+        ----------
+        slicers : {dim: slice, ...}
+            A dictionary mapping from a dimension to a slice object.
+
+        Returns
+        -------
+        obj : Data object
+            The returned object has the same attributes, dimensions,
+            variable names and variable attributes as the original.
+            Variables that are not defined along the specified
+            dimensions are viewed in their entirety. Variables that are
+            defined along the specified dimension have their data
+            contents taken along the specified dimension.
+
+            Care must be taken since modifying (most) values in the returned
+            object will result in modification to the parent object.
+
+        See Also
+        --------
+        view
+        numpy.take
+        Variable.take
+        """
+        if not all([isinstance(sl, slice) for sl in slicers.values()]):
+            raise ValueError("view expects a dict whose values are slice objects")
+        if not all([k in self.dimensions for k in slicers.keys()]):
+            invalid = [k for k in slicers.keys() if not k in self.dimensions]
+            raise KeyError("dimensions %s don't exist" % ', '.join(map(str, invalid)))
+        # Create a new object
+        obj = self._allocate()
+        # Create views onto the variables and infer the new dimension length
+        new_dims = dict(self.dimensions.iteritems())
+        for (name, var) in self.variables.iteritems():
+            var_slicers = dict((k, v) for k, v in slicers.iteritems() if k in var.dimensions)
+            if len(var_slicers):
+                obj.unchecked_add_variable(name, var.views(var_slicers))
+                new_dims.update(dict(zip(obj[name].dimensions, obj[name].shape)))
+            else:
+                obj.unchecked_add_variable(name, var)
+        # Hard write the dimensions, skipping validation
+        obj.unchecked_set_dimensions(new_dims)
+        # Reference to the attributes, this intentionally does not copy.
+        obj.unchecked_set_attributes(self.attributes)
+        return obj
+
+    def view(self, s, dim=None):
+        """Return a new object whose contents are a view of a slice from the
+        current object along a specified dimension
+
+        Parameters
+        ----------
+        s : slice
+            The slice representing the range of the values to extract.
+        dim : string, optional
+            The dimension to slice along. If multiple dimensions of a
+            variable equal dim (e.g. a correlation matrix), then that
+            variable is sliced only along both dimensions.  Without
+            this behavior the resulting data object would have
+            inconsistent dimensions.
+
+        Returns
+        -------
+        obj : Data object
+            The returned object has the same attributes, dimensions,
+            variable names and variable attributes as the original.
+            Variables that are not defined along the specified
+            dimensions are viewed in their entirety. Variables that are
+            defined along the specified dimension have their data
+            contents taken along the specified dimension.
+
+            Care must be taken since modifying (most) values in the returned
+            object will result in modification to the parent object.
+
+        See Also
+        --------
+        views
+        numpy.take
+        Variable.take
+        """
+        obj = self.views({dim : s})
+        if obj.dimensions[dim] == 0:
+            raise IndexError("view results in a dimension of length zero")
+        return obj
+
+    def take(self, indices, dim=None):
+        """Return a new object whose contents are taken from the
+        current object along a specified dimension
+
+        Parameters
+        ----------
+        indices : array_like
+            The indices of the values to extract. indices must be compatible
+            with the ndarray.take() method.
+        dim : string, optional
+            The dimension to slice along. If multiple dimensions of a
+            variable equal dim (e.g. a correlation matrix), then that
+            variable is sliced only along its first matching dimension.
+            If None (default), then the object is sliced along its
+            unlimited dimension; an exception is raised if the object
+            does not have an unlimited dimension.
+
+        Returns
+        -------
+        obj : Data object
+            The returned object has the same attributes, dimensions,
+            variable names and variable attributes as the original.
+            Variables that are not defined along the specified
+            dimensions are copied in their entirety. Variables that are
+            defined along the specified dimension have their data
+            contents taken along the specified dimension.
+
+        See Also
+        --------
+        numpy.take
+        Variable.take
+        """
+        if dim is None:
+            raise ValueError("dim cannot be None")
+        # Create a new object
+        obj = self._allocate()
+        # Create fancy-indexed variables and infer the new dimension length
+        new_length = self.dimensions[dim]
+        for (name, var) in self.variables.iteritems():
+            if dim in var.dimensions:
+                obj.unchecked_add_variable(name, var.take(indices, dim))
+                new_length = obj.variables[name].data.shape[
+                    list(var.dimensions).index(dim)]
+            else:
+                obj.unchecked_add_variable(name, copy.deepcopy(var))
+        # Hard write the dimensions, skipping validation
+        for d, l in self.dimensions.iteritems():
+            if d == dim:
+                l = new_length
+            obj.unchecked_create_dimension(d, l)
+        if obj.dimensions[dim] == 0:
+            raise IndexError(
+                "take would result in a dimension of length zero")
+        # Copy attributes
+        self.unchecked_set_attributes(self.attributes.copy())
+        return obj
+
+    def renamed(self, name_dict):
+        """
+        Returns a copy of the current object with variables and dimensions
+        reanmed according to the arguments passed via **kwds
+
+        Parameters
+        ----------
+        name_dict : dict-like
+            Dictionary-like object whose keys are current variable
+            names and whose values are new names.
+        """
+        for name in self.dimensions.iterkeys():
+            if name in self.variables and not name in self.coordinates:
+                raise ValueError("Renaming assumes that only coordinates " +
+                                 "have both a dimension and variable under " +
+                                 "the same name.  In this case it appears %s " +
+                                 "has a dim and var but is not a coordinate"
+                                 % name)
+
+        new_names = dict((name, name)
+                for name, _ in self.dimensions.iteritems())
+        new_names.update(dict((name, name)
+                for name, _ in self.variables.iteritems()))
+
+        for k, v in name_dict.iteritems():
+            if not k in new_names:
+                raise ValueError("Cannot rename %s because it does not exist" % k)
+        new_names.update(name_dict)
+
+        obj = self._allocate()
+        # if a dimension is a new one it gets added, if the dimension already
+        # exists we confirm that they are identical (or throw an exception)
+        for (name, length) in self.dimensions.iteritems():
+            obj.create_dimension(new_names[name], length)
+        # a variable is only added if it doesn't currently exist, otherwise
+        # and exception is thrown
+        for (name, v) in self.variables.iteritems():
+            obj.create_variable(new_names[name],
+                                tuple([new_names[d] for d in v.dimensions]),
+                                data=v.data.copy(),
+                                attributes=v.attributes.copy())
+        # update the root attributes
+        self.unchecked_set_attributes(self.attributes.copy())
+        return obj
+
+    def update(self, other):
+        """
+        An update method (simular to dict.update) for data objects whereby each
+        dimension, variable and attribute from 'other' is updated in the current
+        object.  Note however that because Data object attributes are often
+        write protected an exception will be raised if an attempt to overwrite
+        any variables is made.
+        """
+        # if a dimension is a new one it gets added, if the dimension already
+        # exists we confirm that they are identical (or throw an exception)
+        for (name, length) in other.dimensions.iteritems():
+            if (name == other.record_dimension and
+                    name != self.record_dimension):
+                raise ValueError(
+                    ("record dimensions do not match: "
+                     "self: %s, other: %s") %
+                    (self.record_dimension, other.record_dimension))
+            if not name in self.dimensions:
+                self.create_dimension(name, length)
+            else:
+                cur_length = self.dimensions[name]
+                if cur_length is None:
+                    cur_length = self[self.record_dimension].data.size
+                if length != cur_length:
+                    raise ValueError("inconsistent dimension lengths for " +
+                                     "dim: %s , %s != %s" %
+                                     (name, length, cur_length))
+        # a variable is only added if it doesn't currently exist, otherwise
+        # and exception is thrown
+        for (name, v) in other.variables.iteritems():
+            if not name in self.variables:
+                self.create_variable(name,
+                                     v.dimensions,
+                                     data=v.data.copy(),
+                                     attributes=v.attributes.copy())
+            else:
+                if self[name].dimensions != other[name].dimensions:
+                    raise ValueError("%s has different dimensions cur:%s new:%s"
+                                     % (name, str(self[name].dimensions),
+                                        str(other[name].dimensions)))
+                if (self.variables[name].data.tostring() !=
+                    other.variables[name].data.tostring()):
+                    raise ValueError("%s has different data" % name)
+                self[name].attributes.update(other[name].attributes)
+        # update the root attributes
+        self.attributes.update(other.attributes)
+
+    def select(self, var):
+        """Return a new object that contains the specified variables,
+        along with the dimensions on which those variables are defined
+        and corresponding coordinate variables.
+
+        Parameters
+        ----------
+        var : bounded sequence of strings
+            The variables to include in the returned object.
+
+        Returns
+        -------
+        obj : Data object
+            The returned object has the same attributes as the
+            original. A dimension is included if at least one of the
+            specified variables is defined along that dimension.
+            Coordinate variables (1-dimensional variables with the same
+            name as a dimension) that correspond to an included
+            dimension are also included. All other variables are
+            dropped.
+        """
+        if isinstance(var, basestring):
+            var = [var]
+        if not (hasattr(var, '__iter__') and hasattr(var, '__len__')):
+            raise TypeError("var must be a bounded sequence")
+        if not all((v in self.variables for v in var)):
+            raise KeyError(
+                "One or more of the specified variables does not exist")
+        # Create a new Data instance
+        obj = self._allocate()
+        # Copy relevant dimensions
+        dim = reduce(or_, [set(self.variables[v].dimensions) for v in var])
+        # Create dimensions in the same order as they appear in self.dimension
+        for d in dim:
+            obj.unchecked_create_dimension(d, self.dimensions[d])
+        # Also include any coordinate variables defined on the relevant
+        # dimensions
+        for (name, v) in self.variables.iteritems():
+            if (name in var) or ((name in dim) and (v.dimensions == (name,))):
+                obj.unchecked_create_variable(name,
+                        dims=v.dimensions,
+                        data=v.data.copy(),
+                        attributes=v.attributes.copy())
+        obj.unchecked_set_attributes(self.attributes.copy())
+        return obj
+
+    def iterator(self, dim=None, views=False):
+        """Iterator along a data dimension
+
+        Return an iterator yielding (coordinate, data_object) pairs
+        that are singleton along the specified dimension
+
+        Parameters
+        ----------
+        dim : string, optional
+            The dimension along which you want to iterate. If None
+            (default), then the iterator operates along the record
+            dimension; if there is no record dimension, an exception
+            will be raised.
+        views : boolean, optional
+            If True, the iterator will give views of the data along
+            the dimension, otherwise copies.
+
+        Returns
+        -------
+        it : iterator
+            The returned iterator yields pairs of scalar-valued
+            coordinate variables and data objects. The yielded data
+            objects contain *copies* onto the underlying numpy arrays of
+            the original data object. If the data object does not have
+            a coordinate variable with the same name as the specified
+            dimension, then the returned coordinate value is None. If
+            multiple dimensions of a variable equal dim (e.g. a
+            correlation matrix), then that variable is iterated along
+            the first matching dimension.
+
+        Examples
+        --------
+        >>> d = Data()
+        >>> d.create_coordinate(name='x', data=numpy.arange(10))
+        >>> d.create_coordinate(name='y', data=numpy.arange(20))
+        >>> print d
+
+        dimensions:
+          name            | length
+         ===========================
+          x               | 10
+          y               | 20
+
+        variables:
+          name            | dtype   | shape           | dimensions
+         =====================================================================
+          x               | int32   | (10,)           | ('x',)
+          y               | int32   | (20,)           | ('y',)
+
+        attributes:
+          None
+
+        >>> i = d.iterator(dim='x')
+        >>> (a, b) = i.next()
+        >>> print a
+
+        dtype:
+          int32
+
+        dimensions:
+          name            | length
+         ===========================
+          x               | 1
+
+        attributes:
+          None
+
+        >>> print b
+
+        dimensions:
+          name            | length
+         ===========================
+          x               | 1
+          y               | 20
+
+        variables:
+          name            | dtype   | shape           | dimensions
+         =====================================================================
+          x               | int32   | (1,)            | ('x',)
+          y               | int32   | (20,)           | ('y',)
+
+        attributes:
+          None
+
+        """
+        # Determine the size of the dim we're about to iterate over
+        n = self.dimensions[dim]
+        # Iterate over the object
+        if dim in self.coordinates:
+            coord = self.variables[dim]
+            if views:
+                for i in xrange(n):
+                    s = slice(i, i + 1)
+                    yield (coord.view(s, dim=dim),
+                           self.view(s, dim=dim))
+            else:
+                for i in xrange(n):
+                    indices = np.array([i])
+                    yield (coord.take(indices, dim=dim),
+                           self.take(indices, dim=dim))
+        else:
+            if views:
+                for i in xrange(n):
+                    yield (None, self.view(slice(i, i + 1), dim=dim))
+            else:
+                for i in xrange(n):
+                    yield (None, self.take(np.array([i]), dim=dim))
+
+    def iterarray(self, var, dim=None):
+        """Iterator along a data dimension returning the corresponding slices
+        of the underlying data of a varaible.
+
+        Return an iterator yielding (scalar, ndarray) pairs that are singleton
+        along the specified dimension.  While iterator is more general, this
+        method has less overhead and in turn should be considerably faster.
+
+        Parameters
+        ----------
+        var : string
+            The variable over which you want to iterate.
+
+        dim : string, optional
+            The dimension along which you want to iterate. If None
+            (default), then the iterator operates along the record
+            dimension; if there is no record dimension, an exception
+            will be raised.
+
+        Returns
+        -------
+        it : iterator
+            The returned iterator yields pairs of scalar-valued
+            and ndarray objects. The yielded data objects contain *views*
+            onto the underlying numpy arrays of the original data object.
+
+        Examples
+        --------
+        >>> d = Data()
+        >>> d.create_coordinate(name='t', data=numpy.arange(5))
+        >>> d.create_dimension(name='h', length=3)
+        >>> d.create_variable(name='x', dim=('t', 'h'),\
+        ...     data=numpy.random.random((10, 3,)))
+        >>> print d['x'].data
+        [[ 0.33499995  0.47606901  0.41334325]
+         [ 0.20229308  0.73693437  0.97451746]
+         [ 0.40020704  0.29763575  0.85588908]
+         [ 0.44114434  0.79233816  0.59115313]
+         [ 0.18583972  0.55084889  0.95478946]]
+        >>> i = d.iterarray(var='x', dim='t')
+        >>> (a, b) = i.next()
+        >>> print a
+        0
+        >>> print b
+        [[ 0.33499995  0.47606901  0.41334325]]
+        """
+        # Get a reference to the underlying ndarray for the desired variable
+        # and build a list of slice objects
+        data = self.variables[var].data
+        axis = list(self.variables[var].dimensions).index(dim)
+        slicer = [slice(None)] * data.ndim
+        # Determine the size of the dim we're about to iterate over
+        n = self.dimensions[dim]
+        # Iterate over dim returning views of the variable.
+        if dim in self.coordinates:
+            coord = self.variables[dim].data
+            for i in xrange(n):
+                slicer[axis] = slice(i, i + 1)
+                yield (coord[i], data[slicer])
+        else:
+            for i in xrange(n):
+                slicer[axis] = slice(i, i + 1)
+                yield (None, data[slicer])
+
+    def squeeze(self, dimension):
+        """
+        Squeezes a dimension of length 1, returning a copy of the object
+        with that dimension removed.
+        """
+        if self.dimensions[dimension] != 1:
+            raise ValueError(("Can only squeeze along dimensions with" +
+                             "length one, %s has length %d") %
+                             (dimension, self.dimensions[dimension]))
+        # Create a new Data instance
+        obj = self._allocate()
+        # Copy dimensions
+        for (name, length) in self.dimensions.iteritems():
+            if not name == dimension:
+                obj.create_dimension(name, length)
+        # Copy variables
+        for (name, var) in self.variables.iteritems():
+            if not name == dimension:
+                dims = list(var.dimensions)
+                data = var.data.copy()
+                if dimension in dims:
+                    shape = list(var.data.shape)
+                    index = dims.index(dimension)
+                    shape.pop(index)
+                    dims.pop(index)
+                    data = data.reshape(shape)
+                obj.create_variable(name=name,
+                        dims=tuple(dims),
+                        data=data,
+                        attributes=var.attributes.copy())
+        obj.unchecked_set_attributes(self.attributes.copy())
+        return obj
+
+if __name__ == "__main__":
+    """
+    A bunch of regression tests.
+    """
+    base_dir = os.path.dirname(__file__)
+    test_dir = os.path.join(base_dir, '..', 'test', )
+    write_test_path = os.path.join(test_dir, 'test_output.nc')
+    ecmwf_netcdf = os.path.join(test_dir, 'ECMWF_ERA-40_subset.nc')
+
+    import time
+    st = time.time()
+    nc = Dataset(ecmwf_netcdf)
+    print "Seconds to read from filepath : ", time.time() - st
+
+    st = time.time()
+    nc.dump(write_test_path)
+    print "Seconds to write : ", time.time() - st
+
+    st = time.time()
+    nc_string = nc.dumps()
+    print "Seconds to serialize : ", time.time() - st
+
+    st = time.time()
+    nc = Dataset(nc_string)
+    print "Seconds to deserialize : ", time.time() - st
+
+    st = time.time()
+    with open(ecmwf_netcdf, 'r') as f:
+        nc = Dataset(f)
+    print "Seconds to read from fobj : ", time.time() - st
+
diff --git a/test/test_data.py b/test/test_data.py
new file mode 100644
index 00000000000..81071085f80
--- /dev/null
+++ b/test/test_data.py
@@ -0,0 +1,357 @@
+import polyglot.data as data
+
+import unittest
+import os.path
+import numpy as np
+import scipy.interpolate
+
+from copy import deepcopy
+from cStringIO import StringIO
+
+from polyglot import Dataset, Variable
+
+_dims = {'dim1':100, 'dim2':50, 'dim3':10}
+_vars = {'var1':['dim1', 'dim2'],
+         'var2':['dim1', 'dim2'],
+         'var3':['dim3', 'dim1'],
+         }
+_testvar = sorted(_vars.keys())[0]
+_testdim = sorted(_dims.keys())[0]
+
+def test_data():
+    obj = Dataset()
+    obj.create_dimension('time', 10)
+    for d, l in _dims.items():
+        obj.create_dimension(d, l)
+        var = obj.create_variable(name=d, dims=(d,), data=np.arange(l),
+                                  attributes={'units':'integers'})
+    for v, dims in _vars.items():
+        var = obj.create_variable(name=v, dims=tuple(dims),
+                data=np.random.normal(size=tuple([_dims[d] for d in dims])))
+        var.attributes['foo'] = 'variable'
+    return obj
+
+class DataTest(unittest.TestCase):
+
+    def test_iterator(self):
+        data = test_data()
+        # iterate over the first dim
+        iterdim = _testdim
+        for t, sub in data.iterator(dim=iterdim):
+            ind = int(np.where(data.variables[iterdim].data == t.data)[0])
+            # make sure all the slices match
+            for v in _vars.keys():
+                if iterdim in data[v].dimensions:
+                    dim_axis = list(data[v].dimensions).index(iterdim)
+                    expected = data[v].data.take(
+                            [ind], axis=dim_axis).reshape(sub[v].data.shape)
+                    np.testing.assert_array_equal(sub[v].data, expected)
+                self.assertEquals(sub.dimensions[iterdim], 1)
+        # test that the yielded objects are copies of the original
+        for (t, sub) in data.iterator(dim=iterdim):
+            sub[_testvar][:] = -71
+        self.assertTrue((data[_testvar].data != -71).all())
+
+    def test_iterarray(self):
+        data = test_data()
+        # iterate over the first dim
+        iterdim = _testdim
+        for t, d in data.iterarray(dim=iterdim, var=_testvar):
+            ind = int(np.where(data.variables[iterdim].data == t)[0])
+            # make sure all the slices match
+            dim_axis = list(data[_testvar].dimensions).index(iterdim)
+            expected = data[_testvar].data.take([ind], axis=dim_axis)
+            np.testing.assert_array_equal(d, expected)
+        # test that the yielded objects are views of the original
+        for (t, d) in data.iterarray(dim=iterdim, var=_testvar):
+            d[:] = -71
+        self.assertTrue((data[_testvar].data == -71).all())
+
+    def test_dimension(self):
+        a = Dataset()
+        # data objects (currently) do not support record dimensions
+        self.assertRaises(ValueError, a.create_dimension, 'time', None)
+        a.create_dimension('time', 10)
+        a.create_dimension('x', 5)
+        # prevent duplicate creation
+        self.assertRaises(ValueError, a.create_dimension, 'time', 0)
+        # length must be integer
+        self.assertRaises(TypeError, a.create_dimension, 'foo', 'a')
+        self.assertRaises(TypeError, a.create_dimension, 'foo', [1,])
+        self.assertTrue('foo' not in a.dimensions)
+
+    def test_variable(self):
+        a = Dataset()
+        a.create_dimension('time', 10)
+        a.create_dimension('x', 3)
+        d = np.random.random((10, 3))
+        a.create_variable(name='foo', dims=('time', 'x',), data=d)
+        self.assertTrue('foo' in a.variables)
+        self.assertTrue('foo' in a)
+        a.create_variable(name='bar', dims=('time', 'x',), data=d)
+        # order of creation is preserved
+        self.assertTrue(a.variables.keys() == ['foo', 'bar'])
+        self.assertTrue(all([a['foo'][i] == d[i]
+                for i in np.ndindex(*d.shape)]))
+        # prevent duplicate creation
+        self.assertRaises(ValueError, a.create_variable,
+                name='foo', dims=('time', 'x',), data=d)
+        # dimension must be defined
+        self.assertRaises(ValueError, a.create_variable,
+                name='qux', dims=('time', 'missing_dim',), data=d)
+        # try to add variable with dim (10,3) with data that's (3,10)
+        self.assertRaises(ValueError, a.create_variable,
+                name='qux', dims=('time', 'x'), data=d.T)
+        # Variable equality
+        d = np.random.rand(10, 3)
+        v1 = Variable(('dim1','dim2'), data=d,
+                           attributes={'att1': 3, 'att2': [1,2,3]})
+        v2 = Variable(('dim1','dim2'), data=d,
+                           attributes={'att1': 3, 'att2': [1,2,3]})
+        v5 = Variable(('dim1','dim2'), data=d,
+                           attributes={'att1': 3, 'att2': [1,2,3]})
+        v3 = Variable(('dim1','dim3'), data=d,
+                           attributes={'att1': 3, 'att2': [1,2,3]})
+        v4 = Variable(('dim1','dim2'), data=d,
+                           attributes={'att1': 3, 'att2': [1,2,4]})
+        v5 = deepcopy(v1)
+        v5.data[:] = np.random.rand(10,3)
+        self.assertEquals(v1, v2)
+        self.assertFalse(v1 == v3)
+        self.assertFalse(v1 == v4)
+        self.assertFalse(v1 == v5)
+        # Variable hash
+        self.assertEquals(hash(v1), hash(v2))
+
+    def test_coordinate(self):
+        a = Dataset()
+        vec = np.random.random((10,))
+        attributes = {'foo': 'bar'}
+        a.create_coordinate('x', data=vec, attributes=attributes)
+        self.assertTrue('x' in a.coordinates)
+        self.assertTrue(a.coordinates['x'] == a.dimensions['x'])
+        b = Dataset()
+        b.create_dimension('x', vec.size)
+        b.create_variable('x', dims=('x',), data=vec, attributes=attributes)
+        self.assertTrue((a['x'].data == b['x'].data).all())
+        self.assertEquals(a.dimensions, b.dimensions)
+        arr = np.random.random((10, 1,))
+        scal = np.array(0)
+        self.assertRaises(ValueError, a.create_coordinate,
+                name='y', data=arr)
+        self.assertRaises(ValueError, a.create_coordinate,
+                name='y', data=scal)
+        self.assertTrue('y' not in a.dimensions)
+
+    def test_attributes(self):
+        a = Dataset()
+        a.attributes['foo'] = 'abc'
+        a.attributes['bar'] = 1
+        # numeric scalars are stored as length-1 vectors
+        self.assertTrue(isinstance(a.attributes['bar'], np.ndarray) and
+                a.attributes['bar'].ndim == 1)
+        # __contains__ method
+        self.assertTrue('foo' in a.attributes)
+        self.assertTrue('bar' in a.attributes)
+        self.assertTrue('baz' not in a.attributes)
+        # user-defined attributes are not object attributes
+        self.assertRaises(AttributeError, object.__getattribute__, a, 'foo')
+        # different ways of setting attributes ought to be equivalent
+        b = Dataset()
+        b.attributes.update(foo='abc')
+        self.assertEquals(a.attributes['foo'], b.attributes['foo'])
+        b = Dataset()
+        b.attributes.update([('foo', 'abc')])
+        self.assertEquals(a.attributes['foo'], b.attributes['foo'])
+        b = Dataset()
+        b.attributes.update({'foo': 'abc'})
+        self.assertEquals(a.attributes['foo'], b.attributes['foo'])
+        # attributes can be overwritten
+        b.attributes['foo'] = 'xyz'
+        self.assertEquals(b.attributes['foo'], 'xyz')
+        # attributes can be deleted
+        del b.attributes['foo']
+        self.assertTrue('foo' not in b.attributes)
+        # attributes can be cleared
+        b.attributes.clear()
+        self.assertTrue(len(b.attributes) == 0)
+        # attributes can be compared
+        a = Dataset()
+        b = Dataset()
+        a.attributes['foo'] = 'bar'
+        b.attributes['foo'] = np.nan
+        self.assertFalse(a == b)
+        a.attributes['foo'] = np.nan
+        self.assertTrue(a == b)
+        # attribute names/values must be netCDF-compatible
+        self.assertRaises(ValueError, b.attributes.__setitem__, '/', 0)
+        self.assertRaises(ValueError, b.attributes.__setitem__, 'foo', np.zeros((2, 2)))
+        self.assertRaises(ValueError, b.attributes.__setitem__, 'foo', dict())
+
+    def test_view(self):
+        data = test_data()
+        slicedim = _testdim
+        s = slice(None, None, 2)
+        ret = data.view(s=s, dim=slicedim)
+        # Verify that only the specified dimension was altered
+        for d in data.dimensions:
+            if d == slicedim:
+                self.assertEqual(ret.dimensions[d],
+                                 np.arange(data.dimensions[d])[s].size)
+            else:
+                self.assertEqual(data.dimensions[d], ret.dimensions[d])
+        # Verify that the data is what we expect
+        for v in data.variables:
+            self.assertEqual(data[v].dimensions, ret[v].dimensions)
+            self.assertEqual(data[v].attributes, ret[v].attributes)
+            if slicedim in data[v].dimensions:
+                slice_list = [slice(None)] * data[v].data.ndim
+                slice_list[data[v].dimensions.index(slicedim)] = s
+                expected = data[v].data[slice_list]
+            else:
+                expected = data[v].data
+            actual = ret[v].data
+            np.testing.assert_array_equal(expected, actual)
+            # Test that our view accesses the same underlying array
+            actual.fill(np.pi)
+            np.testing.assert_array_equal(expected, actual)
+        self.assertRaises(KeyError, data.view,
+                          s=s, dim='not_a_dim')
+        self.assertRaises(IndexError, data.view,
+                          s=slice(100, 200), dim=slicedim)
+
+    def test_views(self):
+        data = test_data()
+
+        data.create_variable('var4', ('dim1', 'dim1'),
+                             data = np.empty((data.dimensions['dim1'],
+                                              data.dimensions['dim1']),
+                                             np.float))
+        data['var4'].data[:] = np.random.normal(size=data['var4'].shape)
+        slicers = {'dim1': slice(None, None, 2), 'dim2':slice(0, 2)}
+        ret = data.views(slicers)
+        data.views(slicers)
+
+        # Verify that only the specified dimension was altered
+        for d in data.dimensions:
+            if d in slicers:
+                self.assertEqual(ret.dimensions[d],
+                                 np.arange(data.dimensions[d])[slicers[d]].size)
+            else:
+                self.assertEqual(data.dimensions[d], ret.dimensions[d])
+        # Verify that the data is what we expect
+        for v in data.variables:
+            self.assertEqual(data[v].dimensions, ret[v].dimensions)
+            self.assertEqual(data[v].attributes, ret[v].attributes)
+            slice_list = [slice(None)] * data[v].data.ndim
+            for d, s in slicers.iteritems():
+                if d in data[v].dimensions:
+                    inds = np.nonzero(np.array(data[v].dimensions) == d)[0]
+                    for ind in inds:
+                        slice_list[ind] = s
+            expected = data[v].data[slice_list]
+            actual = ret[v].data
+            np.testing.assert_array_equal(expected, actual)
+            # Test that our view accesses the same underlying array
+            actual.fill(np.pi)
+            np.testing.assert_array_equal(expected, actual)
+        self.assertRaises(KeyError, data.views,
+                          {'not_a_dim': slice(0, 2)})
+
+    def test_take(self):
+        data = test_data()
+        slicedim = _testdim
+        # using a list
+        ret = data.take(indices=range(2, 5), dim=slicedim)
+        self.assertEquals(len(ret[slicedim].data), 3)
+        # using a numpy vector
+        ret = data.take(indices=np.array([2, 3, 4,]), dim=slicedim)
+        self.assertEquals(len(ret[slicedim].data), 3)
+        # With a random index
+        indices = np.random.randint(data.dimensions[slicedim], size=10)
+        ret = data.take(indices=indices, dim=slicedim)
+        # Verify that only the specified dimension was altered
+        for d in data.dimensions:
+            if d == slicedim:
+                self.assertEqual(ret.dimensions[d], indices.size)
+            else:
+                self.assertEqual(data.dimensions[d], ret.dimensions[d])
+        # Verify that the data is what we expect
+        for v in data.variables:
+            self.assertEqual(data[v].dimensions, ret[v].dimensions)
+            self.assertEqual(data[v].attributes, ret[v].attributes)
+            if slicedim in data[v].dimensions:
+                expected = data[v].data.take(
+                    indices, axis=data[v].dimensions.index(slicedim))
+            else:
+                expected = data[v].data
+            actual = ret[v].data
+            np.testing.assert_array_equal(expected, actual)
+            # Test that our take is a copy
+            ret[v].data.fill(np.pi)
+            self.assertTrue(not (data[v].data == np.pi).any())
+        self.assertRaises(KeyError, data.take,
+                          indices=indices, dim='not_a_dim')
+        self.assertRaises(IndexError, data.take,
+                          indices=[data.dimensions[slicedim] + 10],
+                          dim=slicedim)
+
+    def test_squeeze(self):
+        data = test_data()
+        singleton = data.take([1], 'dim2')
+        squeezed = singleton.squeeze('dim2')
+        assert not 'dim2' in squeezed.dimensions
+        for x in [v for v, d in _vars.iteritems() if 'dim2' in d]:
+            np.testing.assert_array_equal(singleton[x].data.flatten(),
+                                          squeezed[x].data)
+
+    def test_select(self):
+        data = test_data()
+        ret = data.select(_testvar)
+        np.testing.assert_array_equal(data[_testvar].data,
+                                      ret[_testvar].data)
+        self.assertTrue(_vars.keys()[1] not in ret.variables)
+        self.assertRaises(KeyError, data.select, (_testvar, 'not_a_var'))
+
+    def test_copy(self):
+        data = test_data()
+        var = data.variables[_testvar]
+        var.attributes['foo'] = 'hello world'
+        var_copy = var.__deepcopy__()
+        self.assertEqual(var.data[2, 3], var_copy.data[2, 3])
+        var_copy.data[2, 3] = np.pi
+        self.assertNotEqual(var.data[2, 3], np.pi)
+        self.assertEqual(var_copy.attributes['foo'], var.attributes['foo'])
+        var_copy.attributes['foo'] = 'xyz'
+        self.assertNotEqual(var_copy.attributes['foo'], var.attributes['foo'])
+        self.assertEqual(var_copy.attributes['foo'], 'xyz')
+        self.assertNotEqual(id(var), id(var_copy))
+        self.assertNotEqual(id(var.data), id(var_copy.data))
+        self.assertNotEqual(id(var.attributes), id(var_copy.attributes))
+
+    def test_rename(self):
+        data = test_data()
+        newnames = {'var1':'renamed_var1', 'dim2':'renamed_dim2'}
+        renamed = data.renamed(newnames)
+
+        vars = dict((k, v) for k, v in data.variables.iteritems())
+        for k, v in newnames.iteritems():
+            vars[v] = vars.pop(k)
+
+        for k, v in vars.iteritems():
+            self.assertTrue(k in renamed.variables)
+            self.assertEqual(v.attributes, renamed.variables[k].attributes)
+            dims = list(v.dimensions)
+            for name, newname in newnames.iteritems():
+                if name in dims:
+                    dims[dims.index(name)] = newname
+            self.assertEqual(dims, list(renamed.variables[k].dimensions))
+            np.testing.assert_array_equal(v.data, renamed.variables[k].data)
+
+        self.assertTrue('var1' not in renamed.variables)
+        self.assertTrue('var1' not in renamed.dimensions)
+        self.assertTrue('dim2' not in renamed.variables)
+        self.assertTrue('dim2' not in renamed.dimensions)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file