#79 Code Test +5

IvanaXu · Nov 12, 2022 · 01a57a2 · 01a57a2
1 parent bc3188b
commit 01a57a2
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 39 deletions.
diff --git a/pro/DeepRec/tensorflow/compiler/xla/xla.bzl b/pro/DeepRec/tensorflow/compiler/xla/xla.bzl
@@ -1,4 +1,37 @@
-"""Wrapper around proto libraries used inside the XLA codebase."""
+"""Wrapper around cc_proto_library used inside the XLA codebase."""
+
+load(
+    "//tensorflow/core/platform:default/build_config.bzl",
+    "cc_proto_library",
+)
+load(
+    "//tensorflow/core/platform:default/build_config_root.bzl",
+    "if_static",
+)
+load(
+    "//tensorflow/core/platform:default/cuda_build_defs.bzl",
+    "if_cuda_is_configured",
+)
+
+# xla_proto_library() is a convenience wrapper around cc_proto_library.
+def xla_proto_library(name, srcs = [], deps = [], visibility = None, testonly = 0, **kwargs):
+    if kwargs.get("use_grpc_plugin"):
+        kwargs["use_grpc_namespace"] = True
+    cc_proto_library(
+        name = name,
+        srcs = srcs,
+        # Append well-known proto dep. As far as I know this is the only way
+        # for xla_proto_library to access google.protobuf.{Any,Duration,...}.
+        deps = deps + ["@com_google_protobuf//:cc_wkt_protos"],
+        cc_libs = if_static(
+            ["@com_google_protobuf//:protobuf"],
+            otherwise = ["@com_google_protobuf//:protobuf_headers"],
+        ),
+        protoc = "@com_google_protobuf//:protoc",
+        testonly = testonly,
+        visibility = visibility,
+        **kwargs
+    )
 
 def xla_py_proto_library(**kwargs):
     # Note: we don't currently define a proto library target for Python in OSS.
@@ -12,5 +45,9 @@ def xla_py_grpc_library(**kwargs):
 
 ORC_JIT_MEMORY_MAPPER_TARGETS = []
 
+# We link the GPU plugin into the XLA Python extension if CUDA is enabled.
+def xla_python_default_plugins():
+    return if_cuda_is_configured(["//tensorflow/compiler/xla/service:gpu_plugin"])
+
 def xla_py_test_deps():
     return []
diff --git a/pro/DeepRec/tensorflow/compiler/xla/xla.proto b/pro/DeepRec/tensorflow/compiler/xla/xla.proto
@@ -20,6 +20,44 @@ package xla;
 import "tensorflow/compiler/xla/service/hlo.proto";
 import "tensorflow/compiler/xla/xla_data.proto";
 
+// Options for the HLO insert-reduce-precision-operations pass.
+message HloReducePrecisionOptions {
+  // Where and when the reduce-precision operations will be added.
+  enum Location {
+    // Add reduce-precision operations to the inputs of selected instructions.
+    // This is done before any optimization occurs.
+    OP_INPUTS = 0;
+    // Add reduce-precision operations to the outputs of selected instructions.
+    // This is done before any optimization occurs.
+    OP_OUTPUTS = 1;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any selected instructions that have not been fused into
+    // fusion instructions.
+    UNFUSED_OP_OUTPUTS = 2;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any fusion instructions that contain operations matching the
+    // selection criteria.
+    FUSION_INPUTS_BY_CONTENT = 3;
+    // After operation-fusion occurs, add reduce-precision operations to the
+    // outputs of any fusion instructions that contain operations matching the
+    // selection criteria.
+    FUSION_OUTPUTS_BY_CONTENT = 4;
+  }
+  Location location = 1;
+
+  // Exponent and mantissa bit counts for the reduced precision.
+  uint32 exponent_bits = 2;
+  uint32 mantissa_bits = 3;
+
+  // Operations matching these opcodes should be suffixed with reduce-precision
+  // operations.
+  repeated uint32 opcodes_to_suffix = 4;
+
+  // Operations with names containing these substrings should be suffixed with
+  // reduce-precision operations.
+  repeated string opname_substrings_to_suffix = 5;
+}
+
 // Debugging options for XLA. These options may change at any time - there are
 // no guarantees about backward or forward compatibility for these fields.
 message DebugOptions {
@@ -84,7 +122,10 @@ message DebugOptions {
   // If true, a set of expensive LLVM optimization passes will not be run.
   bool xla_llvm_disable_expensive_passes = 73;
 
-  reserved 80;  // Was hlo_reduce_precision_options
+  // Options for inserting reduce-precision operations for numerical
+  // experimentation.  This is a repeated field, as we may want to have
+  // multiple passes with different parameters.
+  repeated HloReducePrecisionOptions hlo_reduce_precision_options = 80;
 
   // This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
   // computation will run n! times with all permunations of layouts for the
@@ -107,9 +148,8 @@ message DebugOptions {
   // If true, the GPU backend is free to use cudnn for HLO batch normalization
   // ops.
   bool xla_gpu_use_cudnn_batchnorm = 94;
-  bool xla_gpu_use_cudnn_softmax = 95;
 
-  // Generate calls to OneDNN in the CPU backend.
+  // Generate calls to MKL-DNN in the CPU backend.
   bool xla_cpu_use_mkl_dnn = 97;
 
   // Maximum kernel unroll factor for the GPU backend.
@@ -159,7 +199,7 @@ message DebugOptions {
   bool xla_gpu_crash_on_verification_failures = 101;
 
   // Disable GEMM and Convolution auto-tuning.
-  int32 xla_gpu_autotune_level = 123;
+  bool xla_gpu_disable_autotune = 123;
 
   // Force the host platform to pretend that there are these many host
   // "devices".  All these devices are backed by the same threadpool.  Defaults
@@ -171,7 +211,7 @@ message DebugOptions {
   int32 xla_force_host_platform_device_count = 102;
 
   // If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
-  bool xla_gpu_disable_gpuasm_optimizations = 103;
+  bool xla_gpu_disable_ptxas_optimizations = 103;
 
   // Enable fast math with eigen in the HLO evaluator.
   bool xla_hlo_evaluator_use_fast_path = 106;
@@ -241,12 +281,6 @@ message DebugOptions {
   // directory.
   bool xla_dump_hlo_snapshots = 118;
 
-  // Include a timestamp in the dumped filenames.
-  bool xla_dump_include_timestamp = 131;
-
-  // Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
-  int32 xla_dump_max_hlo_modules = 132;
-
   //
   // END flags controlling dumping HLO modules.
   //
@@ -259,28 +293,14 @@ message DebugOptions {
   // Blacklist for cuDNN convolutions.
   string xla_gpu_algorithm_blacklist_path = 128;
 
-  // Guarantee run-to-run determinism from reductions on XLA:GPU.
-  bool xla_gpu_deterministic_reductions = 130;
-  // Next id: 134
-
-  // Extra parameters to pass the GPU assembler.
-  string xla_gpu_asm_extra_flags = 141;
-
-  // Per-heap size constraint. New heaps will be created if per-heap max size is
-  // reached.
-  int32 xla_multiheap_size_constraint_per_heap = 142;
-
-  // Next id: 142
-
-  // Persistent compilation cache directory
-  string xla_gpu_persistent_cache_dir = 151;
+  // Next id: 130
 
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
   map<string, string> xla_backend_extra_options = 500;
 
-  reserved 5, 117, 133;  // were xla_hlo_dump_as_graphdef, xla_dump_to, and
-                         // xla_gpu_use_horizontal_fusion
+  reserved 117;  // was xla_dump_to
+  reserved 5;    // Was xla_hlo_dump_as_graphdef
 }
 
 // These settings control how XLA compiles and/or runs code.  Not all settings
@@ -315,14 +335,6 @@ message ExecutionOptions {
   // This optional field specifies the device assignment if known at compile
   // time.
   DeviceAssignmentProto device_assignment = 7;
-
-  // Alias input and output buffers for parameters that are passed-through XLA
-  // modules without being changed.
-  bool alias_passthrough_params = 8;
-
-  // Number of partitions of the computation to run (model parallelism).
-  // If zero, uses the default number of partitions for the XLA service.
-  int32 num_partitions = 9;
 }
 
 message GetDeviceHandlesRequest {

diff --git a/pro/log/run-configure2.log b/pro/log/run-configure2.log
@@ -80,6 +80,4 @@ Configuration finished
 >> STEP@3
 
 >> STEP@4
-Sat Nov 12 12:31:00 UTC 2022 : === Preparing sources in dir: /tmp/tmp.gUn6e6nDCC
-/pro/DeepRec /pro/DeepRec
 > Run