[export] Cover more cases to copy tensor conversions. (pytorch#125628)

Summary: Previously we tried to convert all .to() calls to to_copy in the graph, now some user reports that other methods like .float() is not covered: pytorch/PiPPy#1104 (comment) I think fundemantally .float() should look similar to .to() in export and this diff tries to expand the coverage of the tensor conversion methods here. Test Plan: buck run mode/opt caffe2/test:test_export -- -r float_conversion Differential Revision: D56951634 Pull Request resolved: pytorch#125628 Approved by: https://github.com/tugsbayasgalan
ZelboK · May 19, 2024 · a48463e · a48463e
1 parent cd60801
commit a48463e
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 0 deletions.
diff --git a/test/export/test_export.py b/test/export/test_export.py
@@ -2097,6 +2097,32 @@ def forward(self, x):
         ):
             export(Module(), (torch.tensor(1, device="cpu"),))
 
+    def test_float_conversion(self):
+        class Module(torch.nn.Module):
+            def forward(self, x):
+                return x.float()
+
+        ep = export(Module(), (torch.tensor(1, dtype=torch.float),))
+        ops = []
+        for node in ep.graph.nodes:
+            if node.op == "call_function":
+                ops.append(node.target)
+        self.assertGreater(len(ops), 0)
+        for op in ops:
+            self.assertIn(op, (torch.ops.aten._to_copy.default,))
+
+    def test_device_to_mutation_float(self):
+        class Module(torch.nn.Module):
+            def forward(self, x):
+                y = x.float()
+                y.add_(1)
+                return y, x
+
+        with self.assertRaisesRegex(
+            RuntimeError, "cannot mutate tensors with frozen storage"
+        ):
+            export(Module(), (torch.tensor(1, dtype=torch.float),))
+
     def test_module(self):
         class MyLinear(torch.nn.Module):
             def __init__(self):

diff --git a/torch/_subclasses/functional_tensor.py b/torch/_subclasses/functional_tensor.py
@@ -17,6 +17,27 @@
 not_implemented_log = torch._logging.getArtifactLogger(__name__, "not_implemented")
 
 
+# NOTE Some special handling for tensor conversion during export is needed.
+# Normally, when tracing through the model with tensor.to(), the maybe-aliasing
+# relationship between input and output tensors will be baked into the graph.
+# For example, if we got a tensor with device cpu and call tensor.to("cpu"),
+# it will become a no-op in the graph. For a whole graph capture, this is not
+# sound so we need to do something different. Instead, in export we will try to
+# preserve the tensor conversion by forcing a non-semantic-breaking aten::_to_copy
+# operator to be traced in the graph, and subsequently banning mutations on all
+# such converted tensors.
+# In addition to patching .to() method call in functionalization, we will have to
+# patch other similar methods like float() and cpu(), because they intentionally
+# don't fall back to .to() methods, but have the same behavior as .to() according to
+# pytorch document. https://pytorch.org/docs/stable/generated/torch.Tensor.float.html
+# thus we simply force them to go through .to() call.
+def _conversion_method_template(**extra_kwargs):
+    def _(self, *args, **kwargs):
+        return self.to(*args, **{**kwargs, **extra_kwargs})
+
+    return _
+
+
 class FunctionalTensor(torch.Tensor):
     """
     Functional tensors represent tensors that will remove mutations
@@ -225,6 +246,24 @@ def to(self, *args, **kwargs):
                 return super().to(*args, **{**kwargs, "copy": True})
         return super().to(*args, **kwargs)
 
+    def cuda(self, device=None, *args, **kwargs):
+        device = device or torch.cuda.current_device()
+        if len(args) > 0:
+            return self.to(device, *args, **kwargs)
+        else:
+            return self.to(device=device, **kwargs)
+
+    char = _conversion_method_template(dtype=torch.int8)
+    cpu = _conversion_method_template(device=torch.device("cpu"))
+    bfloat16 = _conversion_method_template(dtype=torch.bfloat16)
+    byte = _conversion_method_template(dtype=torch.uint8)
+    double = _conversion_method_template(dtype=torch.float64)
+    float = _conversion_method_template(dtype=torch.float32)
+    bool = _conversion_method_template(dtype=torch.bool)
+    half = _conversion_method_template(dtype=torch.float16)
+    int = _conversion_method_template(dtype=torch.int32)
+    long = _conversion_method_template(dtype=torch.int64)
+
 
 class FunctionalTensorMode(TorchDispatchMode):
     def __init__(self, pre_dispatch=False, export=False, _allow_token_discovery=False):