Skip to content

Commit

Permalink
#79 Code Test +5
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanaXu committed Nov 12, 2022
1 parent bc3188b commit 01a57a2
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 39 deletions.
39 changes: 38 additions & 1 deletion pro/DeepRec/tensorflow/compiler/xla/xla.bzl
Original file line number Diff line number Diff line change
@@ -1,4 +1,37 @@
"""Wrapper around proto libraries used inside the XLA codebase."""
"""Wrapper around cc_proto_library used inside the XLA codebase."""

load(
"//tensorflow/core/platform:default/build_config.bzl",
"cc_proto_library",
)
load(
"//tensorflow/core/platform:default/build_config_root.bzl",
"if_static",
)
load(
"//tensorflow/core/platform:default/cuda_build_defs.bzl",
"if_cuda_is_configured",
)

# xla_proto_library() is a convenience wrapper around cc_proto_library.
def xla_proto_library(name, srcs = [], deps = [], visibility = None, testonly = 0, **kwargs):
if kwargs.get("use_grpc_plugin"):
kwargs["use_grpc_namespace"] = True
cc_proto_library(
name = name,
srcs = srcs,
# Append well-known proto dep. As far as I know this is the only way
# for xla_proto_library to access google.protobuf.{Any,Duration,...}.
deps = deps + ["@com_google_protobuf//:cc_wkt_protos"],
cc_libs = if_static(
["@com_google_protobuf//:protobuf"],
otherwise = ["@com_google_protobuf//:protobuf_headers"],
),
protoc = "@com_google_protobuf//:protoc",
testonly = testonly,
visibility = visibility,
**kwargs
)

def xla_py_proto_library(**kwargs):
# Note: we don't currently define a proto library target for Python in OSS.
Expand All @@ -12,5 +45,9 @@ def xla_py_grpc_library(**kwargs):

ORC_JIT_MEMORY_MAPPER_TARGETS = []

# We link the GPU plugin into the XLA Python extension if CUDA is enabled.
def xla_python_default_plugins():
return if_cuda_is_configured(["//tensorflow/compiler/xla/service:gpu_plugin"])

def xla_py_test_deps():
return []
84 changes: 48 additions & 36 deletions pro/DeepRec/tensorflow/compiler/xla/xla.proto
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,44 @@ package xla;
import "tensorflow/compiler/xla/service/hlo.proto";
import "tensorflow/compiler/xla/xla_data.proto";

// Options for the HLO insert-reduce-precision-operations pass.
message HloReducePrecisionOptions {
// Where and when the reduce-precision operations will be added.
enum Location {
// Add reduce-precision operations to the inputs of selected instructions.
// This is done before any optimization occurs.
OP_INPUTS = 0;
// Add reduce-precision operations to the outputs of selected instructions.
// This is done before any optimization occurs.
OP_OUTPUTS = 1;
// After operation-fusion occurs, add reduce-precision operations to the
// outputs of any selected instructions that have not been fused into
// fusion instructions.
UNFUSED_OP_OUTPUTS = 2;
// After operation-fusion occurs, add reduce-precision operations to the
// outputs of any fusion instructions that contain operations matching the
// selection criteria.
FUSION_INPUTS_BY_CONTENT = 3;
// After operation-fusion occurs, add reduce-precision operations to the
// outputs of any fusion instructions that contain operations matching the
// selection criteria.
FUSION_OUTPUTS_BY_CONTENT = 4;
}
Location location = 1;

// Exponent and mantissa bit counts for the reduced precision.
uint32 exponent_bits = 2;
uint32 mantissa_bits = 3;

// Operations matching these opcodes should be suffixed with reduce-precision
// operations.
repeated uint32 opcodes_to_suffix = 4;

// Operations with names containing these substrings should be suffixed with
// reduce-precision operations.
repeated string opname_substrings_to_suffix = 5;
}

// Debugging options for XLA. These options may change at any time - there are
// no guarantees about backward or forward compatibility for these fields.
message DebugOptions {
Expand Down Expand Up @@ -84,7 +122,10 @@ message DebugOptions {
// If true, a set of expensive LLVM optimization passes will not be run.
bool xla_llvm_disable_expensive_passes = 73;

reserved 80; // Was hlo_reduce_precision_options
// Options for inserting reduce-precision operations for numerical
// experimentation. This is a repeated field, as we may want to have
// multiple passes with different parameters.
repeated HloReducePrecisionOptions hlo_reduce_precision_options = 80;

// This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
// computation will run n! times with all permunations of layouts for the
Expand All @@ -107,9 +148,8 @@ message DebugOptions {
// If true, the GPU backend is free to use cudnn for HLO batch normalization
// ops.
bool xla_gpu_use_cudnn_batchnorm = 94;
bool xla_gpu_use_cudnn_softmax = 95;

// Generate calls to OneDNN in the CPU backend.
// Generate calls to MKL-DNN in the CPU backend.
bool xla_cpu_use_mkl_dnn = 97;

// Maximum kernel unroll factor for the GPU backend.
Expand Down Expand Up @@ -159,7 +199,7 @@ message DebugOptions {
bool xla_gpu_crash_on_verification_failures = 101;

// Disable GEMM and Convolution auto-tuning.
int32 xla_gpu_autotune_level = 123;
bool xla_gpu_disable_autotune = 123;

// Force the host platform to pretend that there are these many host
// "devices". All these devices are backed by the same threadpool. Defaults
Expand All @@ -171,7 +211,7 @@ message DebugOptions {
int32 xla_force_host_platform_device_count = 102;

// If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
bool xla_gpu_disable_gpuasm_optimizations = 103;
bool xla_gpu_disable_ptxas_optimizations = 103;

// Enable fast math with eigen in the HLO evaluator.
bool xla_hlo_evaluator_use_fast_path = 106;
Expand Down Expand Up @@ -241,12 +281,6 @@ message DebugOptions {
// directory.
bool xla_dump_hlo_snapshots = 118;

// Include a timestamp in the dumped filenames.
bool xla_dump_include_timestamp = 131;

// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
int32 xla_dump_max_hlo_modules = 132;

//
// END flags controlling dumping HLO modules.
//
Expand All @@ -259,28 +293,14 @@ message DebugOptions {
// Blacklist for cuDNN convolutions.
string xla_gpu_algorithm_blacklist_path = 128;

// Guarantee run-to-run determinism from reductions on XLA:GPU.
bool xla_gpu_deterministic_reductions = 130;
// Next id: 134

// Extra parameters to pass the GPU assembler.
string xla_gpu_asm_extra_flags = 141;

// Per-heap size constraint. New heaps will be created if per-heap max size is
// reached.
int32 xla_multiheap_size_constraint_per_heap = 142;

// Next id: 142

// Persistent compilation cache directory
string xla_gpu_persistent_cache_dir = 151;
// Next id: 130

// Extra options to pass to the compilation backend (e.g. LLVM); specific
// interpretation of these values is left to the backend.
map<string, string> xla_backend_extra_options = 500;

reserved 5, 117, 133; // were xla_hlo_dump_as_graphdef, xla_dump_to, and
// xla_gpu_use_horizontal_fusion
reserved 117; // was xla_dump_to
reserved 5; // Was xla_hlo_dump_as_graphdef
}

// These settings control how XLA compiles and/or runs code. Not all settings
Expand Down Expand Up @@ -315,14 +335,6 @@ message ExecutionOptions {
// This optional field specifies the device assignment if known at compile
// time.
DeviceAssignmentProto device_assignment = 7;

// Alias input and output buffers for parameters that are passed-through XLA
// modules without being changed.
bool alias_passthrough_params = 8;

// Number of partitions of the computation to run (model parallelism).
// If zero, uses the default number of partitions for the XLA service.
int32 num_partitions = 9;
}

message GetDeviceHandlesRequest {
Expand Down
2 changes: 0 additions & 2 deletions pro/log/run-configure2.log
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,4 @@ Configuration finished
>> STEP@3

>> STEP@4
Sat Nov 12 12:31:00 UTC 2022 : === Preparing sources in dir: /tmp/tmp.gUn6e6nDCC
/pro/DeepRec /pro/DeepRec
> Run

0 comments on commit 01a57a2

Please sign in to comment.