From 558f31f10232cb2635d07ca1887e5bd5385422c6 Mon Sep 17 00:00:00 2001
From: Brandt Bucher <brandtbucher@microsoft.com>
Date: Thu, 16 Jun 2022 13:49:32 -0700
Subject: [PATCH] GH-91389: Fix dis position information for CACHEs (GH-93663)

(cherry picked from commit f8e576be0a7cd38f753f31cf4178db81a602fc32)
---
 Lib/dis.py                                    | 32 +++++++++++------
 Lib/test/test_dis.py                          | 36 +++++++++++++++++--
 ...2-06-09-17-15-26.gh-issue-91389.OE4vS5.rst |  2 ++
 3 files changed, 58 insertions(+), 12 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2022-06-09-17-15-26.gh-issue-91389.OE4vS5.rst

diff --git a/Lib/dis.py b/Lib/dis.py
index 5a5ee8d848d3a4..5ab830af54c621 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -492,17 +492,29 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
         yield Instruction(_all_opname[op], op,
                           arg, argval, argrepr,
                           offset, starts_line, is_jump_target, positions)
-        if show_caches and _inline_cache_entries[deop]:
-            for name, caches in _cache_format[opname[deop]].items():
-                data = code[offset + 2: offset + 2 + caches * 2]
-                argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
-                for _ in range(caches):
-                    offset += 2
-                    yield Instruction(
-                        "CACHE", 0, 0, None, argrepr, offset, None, False, None
-                    )
-                    # Only show the actual value for the first cache entry:
+        caches = _inline_cache_entries[deop]
+        if not caches:
+            continue
+        if not show_caches:
+            # We still need to advance the co_positions iterator:
+            for _ in range(caches):
+                next(co_positions, ())
+            continue
+        for name, size in _cache_format[opname[deop]].items():
+            for i in range(size):
+                offset += 2
+                # Only show the fancy argrepr for a CACHE instruction when it's
+                # the first entry for a particular cache value and the
+                # instruction using it is actually quickened:
+                if i == 0 and op != deop:
+                    data = code[offset: offset + 2 * size]
+                    argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
+                else:
                     argrepr = ""
+                yield Instruction(
+                    "CACHE", CACHE, 0, None, argrepr, offset, None, False,
+                    Positions(*next(co_positions, ()))
+                )
 
 def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False):
     """Disassemble a code object."""
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index edcac8ee2b7c53..e3e4a3768e7bd3 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -1060,8 +1060,10 @@ def test_show_caches(self):
                     caches = list(self.get_cached_values(quickened, adaptive))
                     for cache in caches:
                         self.assertRegex(cache, pattern)
-                    self.assertEqual(caches.count(""), 8)
-                    self.assertEqual(len(caches), 25)
+                    total_caches = 25
+                    empty_caches = 8 if adaptive and quickened else total_caches
+                    self.assertEqual(caches.count(""), empty_caches)
+                    self.assertEqual(len(caches), total_caches)
 
 
 class DisWithFileTests(DisTests):
@@ -1629,6 +1631,36 @@ def test_co_positions_missing_info(self):
                 self.assertIsNone(positions.col_offset)
                 self.assertIsNone(positions.end_col_offset)
 
+    @requires_debug_ranges()
+    def test_co_positions_with_lots_of_caches(self):
+        def roots(a, b, c):
+            d = b**2 - 4 * a * c
+            yield (-b - cmath.sqrt(d)) / (2 * a)
+            if d:
+                yield (-b + cmath.sqrt(d)) / (2 * a)
+        code = roots.__code__
+        ops = code.co_code[::2]
+        cache_opcode = opcode.opmap["CACHE"]
+        caches = sum(op == cache_opcode for op in ops)
+        non_caches = len(ops) - caches
+        # Make sure we have "lots of caches". If not, roots should be changed:
+        assert 1 / 3 <= caches / non_caches, "this test needs more caches!"
+        for show_caches in (False, True):
+            for adaptive in (False, True):
+                with self.subTest(f"{adaptive=}, {show_caches=}"):
+                    co_positions = [
+                        positions
+                        for op, positions in zip(ops, code.co_positions(), strict=True)
+                        if show_caches or op != cache_opcode
+                    ]
+                    dis_positions = [
+                        instruction.positions
+                        for instruction in dis.get_instructions(
+                            code, adaptive=adaptive, show_caches=show_caches
+                        )
+                    ]
+                    self.assertEqual(co_positions, dis_positions)
+
 # get_instructions has its own tests above, so can rely on it to validate
 # the object oriented API
 class BytecodeTests(InstructionTestCase, DisTestBase):
diff --git a/Misc/NEWS.d/next/Library/2022-06-09-17-15-26.gh-issue-91389.OE4vS5.rst b/Misc/NEWS.d/next/Library/2022-06-09-17-15-26.gh-issue-91389.OE4vS5.rst
new file mode 100644
index 00000000000000..0a126551e4110b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-06-09-17-15-26.gh-issue-91389.OE4vS5.rst
@@ -0,0 +1,2 @@
+Fix an issue where :mod:`dis` utilities could report missing or incorrect
+position information in the presence of ``CACHE`` entries.