materialsvirtuallab · janosh · Jan 23, 2024 · Jan 23, 2024 · Jan 23, 2024 · Jan 23, 2024
diff --git a/monty/io.py b/monty/io.py
@@ -131,11 +131,11 @@ def reverse_readline(
         buf = ""
         m_file.seek(0, 2)
         if is_text:
-            lastchar = m_file.read(1)
+            last_char = m_file.read(1)
         else:
-            lastchar = m_file.read(1).decode("utf-8")
+            last_char = m_file.read(1).decode("utf-8")
 
-        trailing_newline = lastchar == "\n"
+        trailing_newline = last_char == "\n"
 
         while 1:
             newline_pos = buf.rfind("\n")
@@ -149,14 +149,14 @@ def reverse_readline(
                 yield line
             elif pos:
                 # Need to fill buffer
-                toread = min(blk_size, pos)
-                m_file.seek(pos - toread, 0)
+                to_read = min(blk_size, pos)
+                m_file.seek(pos - to_read, 0)
                 if is_text:
-                    buf = m_file.read(toread) + buf
+                    buf = m_file.read(to_read) + buf
                 else:
-                    buf = m_file.read(toread).decode("utf-8") + buf
-                m_file.seek(pos - toread, 0)
-                if pos == toread:
+                    buf = m_file.read(to_read).decode("utf-8") + buf
+                m_file.seek(pos - to_read, 0)
+                if pos == to_read:
                     buf = "\n" + buf
             else:
                 # Start-of-file

diff --git a/monty/json.py b/monty/json.py
@@ -19,11 +19,6 @@
 except ImportError:
     np = None  # type: ignore
 
-try:
-    import pandas as pd
-except ImportError:
-    pd = None  # type: ignore
-
 try:
     import pydantic
 except ImportError:
@@ -380,7 +375,9 @@
             if isinstance(o, np.generic):
                 return o.item()
 
-        if pd is not None:
+        try:
+            import pandas as pd
+
             if isinstance(o, pd.DataFrame):
                 return {
                     "@module": "pandas",
@@ -393,6 +390,8 @@
                     "@class": "Series",
                     "data": o.to_json(default_handler=MontyEncoder().encode),
                 }
+        except ImportError:
+            pass
 
         if bson is not None:
             if isinstance(o, bson.objectid.ObjectId):
@@ -544,13 +543,18 @@
                             dtype=d["dtype"],
                         )
                     return np.array(d["data"], dtype=d["dtype"])
-                elif pd is not None and modname == "pandas":
-                    if classname == "DataFrame":
-                        decoded_data = MontyDecoder().decode(d["data"])
-                        return pd.DataFrame(decoded_data)
-                    if classname == "Series":
-                        decoded_data = MontyDecoder().decode(d["data"])
-                        return pd.Series(decoded_data)
+                elif modname == "pandas":
+                    try:
+                        import pandas as pd
+
+                        if classname == "DataFrame":
+                            decoded_data = MontyDecoder().decode(d["data"])
+                            return pd.DataFrame(decoded_data)
+                        if classname == "Series":
+                            decoded_data = MontyDecoder().decode(d["data"])
+                            return pd.Series(decoded_data)
+                    except ImportError:
+                        pass
                 elif (
                     (bson is not None)
                     and modname == "bson.objectid"
@@ -638,7 +642,8 @@
         ]
     if np is not None and isinstance(obj, np.generic):
         return obj.item()
-    if pd is not None and isinstance(obj, (pd.Series, pd.DataFrame)):
+    if callable(getattr(obj, "to_dict", None)):
+        # handle dataframes and series. used to check isinstance(obj, (pd.Series, pd.DataFrame))
         return obj.to_dict()
     if isinstance(obj, dict):
         return {

diff --git a/monty/os/__init__.py b/monty/os/__init__.py
@@ -2,9 +2,15 @@
 Os functions, e.g., cd, makedirs_p.
 """
 
+from __future__ import annotations
+
 import errno
 import os
 from contextlib import contextmanager
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
 
 __author__ = "Shyue Ping Ong"
 __copyright__ = "Copyright 2013, The Materials Project"
@@ -15,7 +21,7 @@
 
 
 @contextmanager
-def cd(path):
+def cd(path: str | Path):
     """
     A Fabric-inspired cd context that temporarily changes directory for
     performing some tasks, and returns to the original working directory

diff --git a/tests/test_files/3000_lines.txt b/tests/test_files/3000_lines.txt
@@ -2997,4 +2997,4 @@
 2997
 2998
 2999
-3000
+3000
diff --git a/tests/test_files/myfile b/tests/test_files/myfile
@@ -1,2 +1 @@
 HelloWorld.
-
diff --git a/tests/test_files/myfile_txt b/tests/test_files/myfile_txt
@@ -1,2 +1 @@
 HelloWorld.
-
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -28,8 +28,8 @@ def test_reverse_readline(self):
         order, i.e. the first line that is read corresponds to the last line.
         number
         """
-        with open(os.path.join(test_dir, "3000_lines.txt")) as f:
-            for idx, line in enumerate(reverse_readline(f)):
+        with open(os.path.join(test_dir, "3000_lines.txt")) as file:
+            for idx, line in enumerate(reverse_readline(file)):
                 assert (
                     int(line) == self.NUMLINES - idx
                 ), "read_backwards read {} whereas it should "(
@@ -40,13 +40,13 @@ def test_reverse_readline_fake_big(self):
         """
         Make sure that large textfiles are read properly
         """
-        with open(os.path.join(test_dir, "3000_lines.txt")) as f:
-            for idx, line in enumerate(reverse_readline(f, max_mem=0)):
+        with open(os.path.join(test_dir, "3000_lines.txt")) as file:
+            for idx, line in enumerate(reverse_readline(file, max_mem=0), -1):
+                if line == "\n":
+                    continue
                 assert (
                     int(line) == self.NUMLINES - idx
-                ), "read_backwards read {} whereas it should "(
-                    "have read {" "}"
-                ).format(int(line), self.NUMLINES - idx)
+                ), f"read_backwards read {int(line)} whereas it should have read {self.NUMLINES - idx}"
 
     def test_reverse_readline_bz2(self):
         """
@@ -80,7 +80,7 @@ def test_reverse_readfile(self):
         number
         """
         fname = os.path.join(test_dir, "3000_lines.txt")
-        for idx, line in enumerate(reverse_readfile(fname)):
+        for idx, line in enumerate(filter(bool, reverse_readfile(fname))):
             assert int(line) == self.NUMLINES - idx
 
     def test_reverse_readfile_gz(self):
@@ -127,7 +127,7 @@ def test_zopen(self):
         with zopen(os.path.join(test_dir, "myfile_lzma.lzma"), "rt") as f:
             assert f.read() == "HelloWorld.\n\n"
         with zopen(os.path.join(test_dir, "myfile"), mode="rt") as f:
-            assert f.read() == "HelloWorld.\n\n"
+            assert f.read() == "HelloWorld.\n"
 
     @unittest.skipIf(Path is None, "Not Py3k")
     def test_Path_objects(self):

diff --git a/tests/test_json.py b/tests/test_json.py
@@ -158,13 +158,13 @@ def __init__(self, a, b):
 
         self.auto_mson = AutoMSON
 
-    def test_to_from_dict(self):
+    def test_as_from_dict(self):
         obj = self.good_cls("Hello", "World", "Python")
         d = obj.as_dict()
         assert d is not None
         self.good_cls.from_dict(d)
-        jsonstr = obj.to_json()
-        d = json.loads(jsonstr)
+        json_str = obj.to_json()
+        d = json.loads(json_str)
         assert d["@class"], "GoodMSONClass"
         obj = self.bad_cls("Hello", "World")
         d = obj.as_dict()
@@ -219,7 +219,7 @@ def test_version(self):
         d = obj.as_dict()
         assert d["@version"] == tests_version
 
-    def test_nested_to_from_dict(self):
+    def test_nested_as_from_dict(self):
         GMC = GoodMSONClass
         a_list = [GMC(1, 1.0, "one"), GMC(2, 2.0, "two")]
         b_dict = {"first": GMC(3, 3.0, "three"), "second": GMC(4, 4.0, "four")}
@@ -313,28 +313,28 @@ def test_as_from_dict(self):
 
     def test_torch(self):
         t = torch.tensor([0, 1, 2])
-        jsonstr = json.dumps(t, cls=MontyEncoder)
-        t2 = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(t, cls=MontyEncoder)
+        t2 = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(t2, torch.Tensor)
         assert t2.type() == t.type()
         assert np.array_equal(t2, t)
         t = torch.tensor([1 + 1j, 2 + 1j])
-        jsonstr = json.dumps(t, cls=MontyEncoder)
-        t2 = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(t, cls=MontyEncoder)
+        t2 = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(t2, torch.Tensor)
         assert t2.type() == t.type()
         assert np.array_equal(t2, t)
 
     def test_datetime(self):
         dt = datetime.datetime.now()
-        jsonstr = json.dumps(dt, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(dt, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d, datetime.datetime)
         assert dt == d
         # Test a nested datetime.
         a = {"dt": dt, "a": 1}
-        jsonstr = json.dumps(a, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(a, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d["dt"], datetime.datetime)
 
         jsanitize(dt, strict=True)
@@ -343,33 +343,33 @@ def test_uuid(self):
         from uuid import UUID, uuid4
 
         uuid = uuid4()
-        jsonstr = json.dumps(uuid, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(uuid, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d, UUID)
         assert uuid == d
         # Test a nested UUID.
         a = {"uuid": uuid, "a": 1}
-        jsonstr = json.dumps(a, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(a, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d["uuid"], UUID)
 
     def test_nan(self):
         x = [float("NaN")]
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert isinstance(d[0], float)
 
     def test_numpy(self):
         x = np.array([1, 2, 3], dtype="int64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [1, 2 == 3]
         assert d["dtype"] == "int64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         x = np.min([1, 2, 3]) > 2
         with pytest.raises(TypeError):
@@ -378,26 +378,26 @@ def test_numpy(self):
         x = np.array([1 + 1j, 2 + 1j, 3 + 1j], dtype="complex64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [[1.0, 2.0, 3.0], [1.0, 1.0 == 1.0]]
         assert d["dtype"] == "complex64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         assert x.dtype == "complex64"
 
         x = np.array([[1 + 1j, 2 + 1j], [3 + 1j, 4 + 1j]], dtype="complex64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [[[1.0, 2.0], [3.0, 4.0]], [[1.0, 1.0], [1.0 == 1.0]]]
         assert d["dtype"] == "complex64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         assert x.dtype == "complex64"
 
@@ -489,22 +489,22 @@ def test_callable(self):
         ]:
             with pytest.raises(TypeError):
                 json.dumps(function)
-            djson = json.dumps(function, cls=MontyEncoder)
-            d = json.loads(djson)
+            dct_json = json.dumps(function, cls=MontyEncoder)
+            d = json.loads(dct_json)
             assert "@callable" in d
             assert "@module" in d
-            x = json.loads(djson, cls=MontyDecoder)
+            x = json.loads(dct_json, cls=MontyDecoder)
             assert x == function
 
         # test method bound to instance
         for function in [instance.method]:
             with pytest.raises(TypeError):
                 json.dumps(function)
-            djson = json.dumps(function, cls=MontyEncoder)
-            d = json.loads(djson)
+            dct_json = json.dumps(function, cls=MontyEncoder)
+            d = json.loads(dct_json)
             assert "@callable" in d
             assert "@module" in d
-            x = json.loads(djson, cls=MontyDecoder)
+            x = json.loads(dct_json, cls=MontyDecoder)
 
             # can't just check functions are equal as the instance the function is bound
             # to will be different. Instead, we check that the serialized instance
@@ -519,15 +519,15 @@ def test_callable(self):
 
         # test that callable MSONable objects still get serialized as the objects
         # rather than as a callable
-        djson = json.dumps(instance, cls=MontyEncoder)
-        assert "@class" in djson
+        dct_json = json.dumps(instance, cls=MontyEncoder)
+        assert "@class" in dct_json
 
     def test_objectid(self):
         oid = ObjectId("562e8301218dcbbc3d7d91ce")
         with pytest.raises(TypeError):
             json.dumps(oid)
-        djson = json.dumps(oid, cls=MontyEncoder)
-        x = json.loads(djson, cls=MontyDecoder)
+        dct_json = json.dumps(oid, cls=MontyEncoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, ObjectId)
 
     def test_jsanitize(self):