From 7b564580f5836fdaa3113148670ed56648a17e9d Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 07:34:24 +0100
Subject: [PATCH 1/8] lazy import pandas to improve startup time

---
 monty/json.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/monty/json.py b/monty/json.py
index 8440a4225..f0ab1de0a 100644
--- a/monty/json.py
+++ b/monty/json.py
@@ -19,11 +19,6 @@
 except ImportError:
     np = None  # type: ignore
 
-try:
-    import pandas as pd
-except ImportError:
-    pd = None  # type: ignore
-
 try:
     import pydantic
 except ImportError:
@@ -380,7 +375,9 @@ def default(self, o) -> dict:  # pylint: disable=E0202
             if isinstance(o, np.generic):
                 return o.item()
 
-        if pd is not None:
+        try:
+            import pandas as pd
+
             if isinstance(o, pd.DataFrame):
                 return {
                     "@module": "pandas",
@@ -393,6 +390,8 @@ def default(self, o) -> dict:  # pylint: disable=E0202
                     "@class": "Series",
                     "data": o.to_json(default_handler=MontyEncoder().encode),
                 }
+        except ImportError:
+            pass
 
         if bson is not None:
             if isinstance(o, bson.objectid.ObjectId):

From 1540b4237eddda04c92ca345aecf94f6167737af Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 07:35:17 +0100
Subject: [PATCH 2/8] add Path to cd() contextmanager type hint

---
 monty/os/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/monty/os/__init__.py b/monty/os/__init__.py
index edc48e23a..d3d8574e2 100644
--- a/monty/os/__init__.py
+++ b/monty/os/__init__.py
@@ -5,6 +5,10 @@
 import errno
 import os
 from contextlib import contextmanager
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
 
 __author__ = "Shyue Ping Ong"
 __copyright__ = "Copyright 2013, The Materials Project"
@@ -15,7 +19,7 @@
 
 
 @contextmanager
-def cd(path):
+def cd(path: str | Path):
     """
     A Fabric-inspired cd context that temporarily changes directory for
     performing some tasks, and returns to the original working directory

From ec78d77de2b0d3a011188f87928025b006c142f9 Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 07:35:56 +0100
Subject: [PATCH 3/8] snake_case

---
 tests/test_json.py | 74 +++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/tests/test_json.py b/tests/test_json.py
index 985db583e..a7859d426 100644
--- a/tests/test_json.py
+++ b/tests/test_json.py
@@ -158,13 +158,13 @@ def __init__(self, a, b):
 
         self.auto_mson = AutoMSON
 
-    def test_to_from_dict(self):
+    def test_as_from_dict(self):
         obj = self.good_cls("Hello", "World", "Python")
         d = obj.as_dict()
         assert d is not None
         self.good_cls.from_dict(d)
-        jsonstr = obj.to_json()
-        d = json.loads(jsonstr)
+        json_str = obj.to_json()
+        d = json.loads(json_str)
         assert d["@class"], "GoodMSONClass"
         obj = self.bad_cls("Hello", "World")
         d = obj.as_dict()
@@ -219,7 +219,7 @@ def test_version(self):
         d = obj.as_dict()
         assert d["@version"] == tests_version
 
-    def test_nested_to_from_dict(self):
+    def test_nested_as_from_dict(self):
         GMC = GoodMSONClass
         a_list = [GMC(1, 1.0, "one"), GMC(2, 2.0, "two")]
         b_dict = {"first": GMC(3, 3.0, "three"), "second": GMC(4, 4.0, "four")}
@@ -313,28 +313,28 @@ def test_as_from_dict(self):
 
     def test_torch(self):
         t = torch.tensor([0, 1, 2])
-        jsonstr = json.dumps(t, cls=MontyEncoder)
-        t2 = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(t, cls=MontyEncoder)
+        t2 = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(t2, torch.Tensor)
         assert t2.type() == t.type()
         assert np.array_equal(t2, t)
         t = torch.tensor([1 + 1j, 2 + 1j])
-        jsonstr = json.dumps(t, cls=MontyEncoder)
-        t2 = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(t, cls=MontyEncoder)
+        t2 = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(t2, torch.Tensor)
         assert t2.type() == t.type()
         assert np.array_equal(t2, t)
 
     def test_datetime(self):
         dt = datetime.datetime.now()
-        jsonstr = json.dumps(dt, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(dt, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d, datetime.datetime)
         assert dt == d
         # Test a nested datetime.
         a = {"dt": dt, "a": 1}
-        jsonstr = json.dumps(a, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(a, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d["dt"], datetime.datetime)
 
         jsanitize(dt, strict=True)
@@ -343,33 +343,33 @@ def test_uuid(self):
         from uuid import UUID, uuid4
 
         uuid = uuid4()
-        jsonstr = json.dumps(uuid, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(uuid, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d, UUID)
         assert uuid == d
         # Test a nested UUID.
         a = {"uuid": uuid, "a": 1}
-        jsonstr = json.dumps(a, cls=MontyEncoder)
-        d = json.loads(jsonstr, cls=MontyDecoder)
+        json_str = json.dumps(a, cls=MontyEncoder)
+        d = json.loads(json_str, cls=MontyDecoder)
         assert isinstance(d["uuid"], UUID)
 
     def test_nan(self):
         x = [float("NaN")]
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert isinstance(d[0], float)
 
     def test_numpy(self):
         x = np.array([1, 2, 3], dtype="int64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [1, 2 == 3]
         assert d["dtype"] == "int64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         x = np.min([1, 2, 3]) > 2
         with pytest.raises(TypeError):
@@ -378,26 +378,26 @@ def test_numpy(self):
         x = np.array([1 + 1j, 2 + 1j, 3 + 1j], dtype="complex64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [[1.0, 2.0, 3.0], [1.0, 1.0 == 1.0]]
         assert d["dtype"] == "complex64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         assert x.dtype == "complex64"
 
         x = np.array([[1 + 1j, 2 + 1j], [3 + 1j, 4 + 1j]], dtype="complex64")
         with pytest.raises(TypeError):
             json.dumps(x)
-        djson = json.dumps(x, cls=MontyEncoder)
-        d = json.loads(djson)
+        dct_json = json.dumps(x, cls=MontyEncoder)
+        d = json.loads(dct_json)
         assert d["@class"] == "array"
         assert d["@module"] == "numpy"
         assert d["data"], [[[1.0, 2.0], [3.0, 4.0]], [[1.0, 1.0], [1.0 == 1.0]]]
         assert d["dtype"] == "complex64"
-        x = json.loads(djson, cls=MontyDecoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, np.ndarray)
         assert x.dtype == "complex64"
 
@@ -489,22 +489,22 @@ def test_callable(self):
         ]:
             with pytest.raises(TypeError):
                 json.dumps(function)
-            djson = json.dumps(function, cls=MontyEncoder)
-            d = json.loads(djson)
+            dct_json = json.dumps(function, cls=MontyEncoder)
+            d = json.loads(dct_json)
             assert "@callable" in d
             assert "@module" in d
-            x = json.loads(djson, cls=MontyDecoder)
+            x = json.loads(dct_json, cls=MontyDecoder)
             assert x == function
 
         # test method bound to instance
         for function in [instance.method]:
             with pytest.raises(TypeError):
                 json.dumps(function)
-            djson = json.dumps(function, cls=MontyEncoder)
-            d = json.loads(djson)
+            dct_json = json.dumps(function, cls=MontyEncoder)
+            d = json.loads(dct_json)
             assert "@callable" in d
             assert "@module" in d
-            x = json.loads(djson, cls=MontyDecoder)
+            x = json.loads(dct_json, cls=MontyDecoder)
 
             # can't just check functions are equal as the instance the function is bound
             # to will be different. Instead, we check that the serialized instance
@@ -519,15 +519,15 @@ def test_callable(self):
 
         # test that callable MSONable objects still get serialized as the objects
         # rather than as a callable
-        djson = json.dumps(instance, cls=MontyEncoder)
-        assert "@class" in djson
+        dct_json = json.dumps(instance, cls=MontyEncoder)
+        assert "@class" in dct_json
 
     def test_objectid(self):
         oid = ObjectId("562e8301218dcbbc3d7d91ce")
         with pytest.raises(TypeError):
             json.dumps(oid)
-        djson = json.dumps(oid, cls=MontyEncoder)
-        x = json.loads(djson, cls=MontyDecoder)
+        dct_json = json.dumps(oid, cls=MontyEncoder)
+        x = json.loads(dct_json, cls=MontyDecoder)
         assert isinstance(x, ObjectId)
 
     def test_jsanitize(self):

From a4ff19ccc3dcf4773c681c1be53def21635c16fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:43:42 +0000
Subject: [PATCH 4/8] pre-commit auto-fixes

---
 tests/test_files/3000_lines.txt | 2 +-
 tests/test_files/myfile         | 1 -
 tests/test_files/myfile_txt     | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_files/3000_lines.txt b/tests/test_files/3000_lines.txt
index 2f8c055bc..1127304b4 100644
--- a/tests/test_files/3000_lines.txt
+++ b/tests/test_files/3000_lines.txt
@@ -2997,4 +2997,4 @@
 2997
 2998
 2999
-3000
\ No newline at end of file
+3000
diff --git a/tests/test_files/myfile b/tests/test_files/myfile
index 1e35ee2e0..459b7cc69 100644
--- a/tests/test_files/myfile
+++ b/tests/test_files/myfile
@@ -1,2 +1 @@
 HelloWorld.
-
diff --git a/tests/test_files/myfile_txt b/tests/test_files/myfile_txt
index 1e35ee2e0..459b7cc69 100644
--- a/tests/test_files/myfile_txt
+++ b/tests/test_files/myfile_txt
@@ -1,2 +1 @@
 HelloWorld.
-

From 3025a1a9296ae68d528db5a50c6885f7cdc1e454 Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 07:50:50 +0100
Subject: [PATCH 5/8] future import annotations in monty/os/__init__.py

---
 monty/os/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/monty/os/__init__.py b/monty/os/__init__.py
index d3d8574e2..0568d7f05 100644
--- a/monty/os/__init__.py
+++ b/monty/os/__init__.py
@@ -2,6 +2,8 @@
 Os functions, e.g., cd, makedirs_p.
 """
 
+from __future__ import annotations
+
 import errno
 import os
 from contextlib import contextmanager

From 1376c32131610b3e75d130f21b05e46978eaec63 Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 07:55:37 +0100
Subject: [PATCH 6/8] add missing pandas lazy import

---
 monty/json.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/monty/json.py b/monty/json.py
index f0ab1de0a..13c6c3df4 100644
--- a/monty/json.py
+++ b/monty/json.py
@@ -543,13 +543,18 @@ def process_decoded(self, d):
                             dtype=d["dtype"],
                         )
                     return np.array(d["data"], dtype=d["dtype"])
-                elif pd is not None and modname == "pandas":
-                    if classname == "DataFrame":
-                        decoded_data = MontyDecoder().decode(d["data"])
-                        return pd.DataFrame(decoded_data)
-                    if classname == "Series":
-                        decoded_data = MontyDecoder().decode(d["data"])
-                        return pd.Series(decoded_data)
+                elif modname == "pandas":
+                    try:
+                        import pandas as pd
+
+                        if classname == "DataFrame":
+                            decoded_data = MontyDecoder().decode(d["data"])
+                            return pd.DataFrame(decoded_data)
+                        if classname == "Series":
+                            decoded_data = MontyDecoder().decode(d["data"])
+                            return pd.Series(decoded_data)
+                    except ImportError:
+                        pass
                 elif (
                     (bson is not None)
                     and modname == "bson.objectid"

From 2724b05652140430f95f946dd2ebd5937d60500f Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 08:00:27 +0100
Subject: [PATCH 7/8] change how jsanitize function to handles dataframes and
 series

---
 monty/json.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/monty/json.py b/monty/json.py
index 13c6c3df4..eb967680d 100644
--- a/monty/json.py
+++ b/monty/json.py
@@ -642,7 +642,8 @@ def jsanitize(
         ]
     if np is not None and isinstance(obj, np.generic):
         return obj.item()
-    if pd is not None and isinstance(obj, (pd.Series, pd.DataFrame)):
+    if callable(getattr(obj, "to_dict", None)):
+        # handle dataframes and series. used to check isinstance(obj, (pd.Series, pd.DataFrame))
         return obj.to_dict()
     if isinstance(obj, dict):
         return {

From 127133cca07172a61419a39dfd5fa302c9af7c95 Mon Sep 17 00:00:00 2001
From: Janosh Riebesell <janosh.riebesell@gmail.com>
Date: Tue, 23 Jan 2024 08:30:06 +0100
Subject: [PATCH 8/8] fix tests

---
 monty/io.py      | 18 +++++++++---------
 tests/test_io.py | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/monty/io.py b/monty/io.py
index df7f1ffdc..097ca2730 100644
--- a/monty/io.py
+++ b/monty/io.py
@@ -131,11 +131,11 @@ def reverse_readline(
         buf = ""
         m_file.seek(0, 2)
         if is_text:
-            lastchar = m_file.read(1)
+            last_char = m_file.read(1)
         else:
-            lastchar = m_file.read(1).decode("utf-8")
+            last_char = m_file.read(1).decode("utf-8")
 
-        trailing_newline = lastchar == "\n"
+        trailing_newline = last_char == "\n"
 
         while 1:
             newline_pos = buf.rfind("\n")
@@ -149,14 +149,14 @@ def reverse_readline(
                 yield line
             elif pos:
                 # Need to fill buffer
-                toread = min(blk_size, pos)
-                m_file.seek(pos - toread, 0)
+                to_read = min(blk_size, pos)
+                m_file.seek(pos - to_read, 0)
                 if is_text:
-                    buf = m_file.read(toread) + buf
+                    buf = m_file.read(to_read) + buf
                 else:
-                    buf = m_file.read(toread).decode("utf-8") + buf
-                m_file.seek(pos - toread, 0)
-                if pos == toread:
+                    buf = m_file.read(to_read).decode("utf-8") + buf
+                m_file.seek(pos - to_read, 0)
+                if pos == to_read:
                     buf = "\n" + buf
             else:
                 # Start-of-file
diff --git a/tests/test_io.py b/tests/test_io.py
index 043e67440..4f37a979e 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -28,8 +28,8 @@ def test_reverse_readline(self):
         order, i.e. the first line that is read corresponds to the last line.
         number
         """
-        with open(os.path.join(test_dir, "3000_lines.txt")) as f:
-            for idx, line in enumerate(reverse_readline(f)):
+        with open(os.path.join(test_dir, "3000_lines.txt")) as file:
+            for idx, line in enumerate(reverse_readline(file)):
                 assert (
                     int(line) == self.NUMLINES - idx
                 ), "read_backwards read {} whereas it should "(
@@ -40,13 +40,13 @@ def test_reverse_readline_fake_big(self):
         """
         Make sure that large textfiles are read properly
         """
-        with open(os.path.join(test_dir, "3000_lines.txt")) as f:
-            for idx, line in enumerate(reverse_readline(f, max_mem=0)):
+        with open(os.path.join(test_dir, "3000_lines.txt")) as file:
+            for idx, line in enumerate(reverse_readline(file, max_mem=0), -1):
+                if line == "\n":
+                    continue
                 assert (
                     int(line) == self.NUMLINES - idx
-                ), "read_backwards read {} whereas it should "(
-                    "have read {" "}"
-                ).format(int(line), self.NUMLINES - idx)
+                ), f"read_backwards read {int(line)} whereas it should have read {self.NUMLINES - idx}"
 
     def test_reverse_readline_bz2(self):
         """
@@ -80,7 +80,7 @@ def test_reverse_readfile(self):
         number
         """
         fname = os.path.join(test_dir, "3000_lines.txt")
-        for idx, line in enumerate(reverse_readfile(fname)):
+        for idx, line in enumerate(filter(bool, reverse_readfile(fname))):
             assert int(line) == self.NUMLINES - idx
 
     def test_reverse_readfile_gz(self):
@@ -127,7 +127,7 @@ def test_zopen(self):
         with zopen(os.path.join(test_dir, "myfile_lzma.lzma"), "rt") as f:
             assert f.read() == "HelloWorld.\n\n"
         with zopen(os.path.join(test_dir, "myfile"), mode="rt") as f:
-            assert f.read() == "HelloWorld.\n\n"
+            assert f.read() == "HelloWorld.\n"
 
     @unittest.skipIf(Path is None, "Not Py3k")
     def test_Path_objects(self):