docs: the rest of the changes I thought was committing earlier

microsoft · Jul 21, 2016 · 249f7a4 · 249f7a4
1 parent 8eeac0d
commit 249f7a4
Showing 1 changed file with 89 additions and 28 deletions.
diff --git a/qcodes/data/format.py b/qcodes/data/format.py
@@ -24,12 +24,13 @@ class Formatter:
       as a separate method because it occasionally gets called independently.
 
     All of these methods accept a ``data_set`` argument, which should be a
-    ``DataSet`` object. Even if
+    ``DataSet`` object. Even if you are loading a new data set from disk, this
+    object should already have attributes:
         io: an IO manager (see qcodes.data.io)
         location: a string, like a file path, that identifies the DataSet and
             tells the IO manager where to store it
-        arrays: a dict of {array_id:DataArray} to read into.
-            - read will create DataArrays that don't yet exist.
+        arrays: a dict of ``{array_id:DataArray}`` to read into.
+            - read will create entries that don't yet exist.
             - write will write ALL DataArrays in the DataSet, using
               last_saved_index and modified_range, as well as whether or not
               it found the specified file, to determine how much to write.
@@ -54,9 +55,20 @@ def write(self, data_set, io_manager, location):
 
     def read(self, data_set):
         """
-        Read the entire DataSet by finding all files matching its location
-        (using io_manager.list) and calling read_one_file from the Formatter
-        subclass. Subclasses may alternatively override this entire method.
+        Read the entire ``DataSet``.
+
+        Find all files matching ``data_set.location`` (using io_manager.list)
+        and call ``read_one_file`` on each. Subclasses may either override
+        this method (if they use only one file or want to do their own
+        searching) or override ``read_one_file`` to use the search and
+        initialization functionality defined here.
+
+        Args:
+            data_set (DataSet): the data to read into. Should already have
+                attributes ``io`` (an io manager), ``location`` (string),
+                and ``arrays`` (dict of ``{array_id: array}``, can be empty
+                or can already have some or all of the arrays present, they
+                expect to be overwritten)
         """
         io_manager = data_set.io
         location = data_set.location
@@ -98,37 +110,74 @@ def write_metadata(self, data_set, io_manager, location, read_first=True):
         raise NotImplementedError
 
     def read_metadata(self, data_set):
-        """Read the metadata from this DataSet from storage."""
+        """
+        Read the metadata from this DataSet from storage.
+
+        Subclasses must override this method.
+
+        Args:
+            data_set (DataSet): the data to read metadata into
+        """
         raise NotImplementedError
 
     def read_one_file(self, data_set, f, ids_read):
         """
-        Formatter subclasses that handle multiple data files may choose to
-        override this method, which handles one file at a time.
-
-        data_set: the DataSet we are reading into
-        f: a file-like object to read from
-        ids_read: a `set` of array_ids that we have already read.
-            when you read an array, check that it's not in this set (except
-            setpoints, which can be in several files with different inner loop)
-            then add it to the set so other files know not to read it again
+        Read data from a single file into a ``DataSet``.
+
+        Formatter subclasses that break a DataSet into multiple data files may
+        choose to override either this method, which handles one file at a
+        time, or ``read`` which finds matching files on its own.
+
+        Args:
+            data_set (DataSet): the data we are reading into.
+
+            f (file-like): a file-like object to read from, as provided by
+                ``io_manager.open``.
+
+            ids_read (set): ``array_id``s that we have already read.
+                When you read an array, check that it's not in this set (except
+                setpoints, which can be in several files with different inner
+                loops) then add it to the set so other files know it should not
+                be read again.
+
+        Raises:
+            ValueError: if a duplicate array_id of measured data is found
         """
         raise NotImplementedError
 
     def match_save_range(self, group, file_exists, only_complete=True):
         """
-        Find the save range that will capture all changes in an array group.
-        matches all full-sized arrays: the data arrays plus the inner loop
-        setpoint array
+        Find the save range that will joins all changes in an array group.
 
-        note: if an outer loop has changed values (without the inner
-        loop or measured data changing) we won't notice it here
+        Matches all full-sized arrays: the data arrays plus the inner loop
+        setpoint array.
 
-        use the inner setpoint as a base and look for differences
-        in last_saved_index and modified_range in the data arrays
+        Note: if an outer loop has changed values (without the inner
+        loop or measured data changing) we won't notice it here. We assume
+        that before an iteration of the inner loop starts, the outer loop
+        setpoint gets set and then does not change later.
 
-        if `only_complete` is True (default), will not mark any range to be
-        saved unless it contains no NaN values
+        Args:
+            group (Formatter.ArrayGroup): a ``namedtuple`` containing the
+                arrays that go together in one file, as tuple ``group.data``.
+
+            file_exists (bool): Does this file already exist? If True, and
+                all arrays in the group agree on ``last_saved_index``, we
+                assume the file has been written up to this index and we can
+                append to it. Otherwise we will set the returned range to start
+                from zero (so if the file does exist, it gets completely
+                overwritten).
+
+            only_complete (bool): Should we write all available new data,
+                or only complete rows? If True, we write only the range of
+                array indices which all arrays in the group list as modified,
+                so that future writes will be able to do a clean append to
+                the data file as more data arrives.
+                Default True.
+
+        Returns:
+            Tuple(int, int): the first and last raveled indices that should
+                be saved.
         """
         inner_setpoint = group.set_arrays[-1]
         full_dim_data = (inner_setpoint, ) + group.data
@@ -203,9 +252,21 @@ def _match_save_range_incremental(arrays, last_saved_index, only_complete):
 
     def group_arrays(self, arrays):
         """
-        find the sets of arrays which share all the same setpoint arrays
-        so each set can be grouped together into one file
-        returns ArrayGroup namedtuples
+        Find the sets of arrays which share all the same setpoint arrays.
+
+        Some Formatters use this grouping to determine which arrays to save
+        together in one file.
+
+        Args:
+            arrays (Dict[DataArray]): all the arrays in a DataSet
+
+        Returns:
+            List[Formatter.ArrayGroup]: namedtuples giving:
+                shape (Tuple[int]): dimensions as in numpy
+                set_arrays (Tuple[DataArray]): the setpoints of this group
+                data (Tuple[DataArray]): measured arrays in this group
+                name (str): a unique name of this group, obtained by joining
+                    the setpoint array ids.
         """
 
         set_array_sets = tuple(set(array.set_arrays