From 71742243aeab0e51238eaa6df9487a0c37e97087 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Fri, 29 Sep 2023 15:45:56 +0200 Subject: [PATCH] [Clang] Add __datasizeof --- clang/docs/LanguageExtensions.rst | 12 ++++++ clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Basic/Features.def | 1 + clang/include/clang/Basic/TokenKinds.def | 1 + clang/lib/AST/ExprConstant.cpp | 20 +++++++-- clang/lib/AST/ItaniumMangle.cpp | 9 ++++ clang/lib/CodeGen/CGExprScalar.cpp | 5 ++- clang/lib/Parse/ParseExpr.cpp | 42 ++++++++++++++----- clang/lib/Sema/SemaExpr.cpp | 5 ++- clang/test/CodeGenCXX/datasizeof.cpp | 19 +++++++++ clang/test/SemaCXX/datasizeof.cpp | 53 ++++++++++++++++++++++++ 11 files changed, 150 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGenCXX/datasizeof.cpp create mode 100644 clang/test/SemaCXX/datasizeof.cpp diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 30e288f986782f..294210c6ac140a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -424,6 +424,18 @@ Builtin Macros "UTF-16" or "UTF-32" (but may change in the future if the ``-fwide-exec-charset="Encoding-Name"`` option is implemented.) +Implementation-defined keywords +=============================== + +__datasizeof +------------ + +``__datasizeof`` behaves like ``sizeof``, except that it returns the size of the +type ignoring tail padding. + +.. + FIXME: This should list all the keyword extensions + .. _langext-vectors: Vectors and Extended Vectors diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8bac599f88503a..29d9f16a6f4b9f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -217,6 +217,8 @@ Non-comprehensive list of changes in this release (e.g., ``uint16x8_t``), this returns the constant number of elements at compile-time. For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not known at compile-time and is determined at runtime. +* The ``__datasizeof`` keyword has been added. It is similar to ``sizeof`` + except that it returns the size of a type ignoring tail padding. New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index cf626d0120cc7c..4a4503ac2ec375 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -277,6 +277,7 @@ EXTENSION(gnu_asm_goto_with_outputs_full, LangOpts.GNUAsm) EXTENSION(matrix_types, LangOpts.MatrixTypes) EXTENSION(matrix_types_scalar_division, true) EXTENSION(cxx_attributes_on_using_declarations, LangOpts.CPlusPlus11) +EXTENSION(datasizeof, LangOpts.CPlusPlus) FEATURE(builtin_headers_in_system_modules, LangOpts.BuiltinHeadersInSystemModules) FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables) diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 3ce317d318f9bb..6cb4b3f250c403 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -310,6 +310,7 @@ KEYWORD(return , KEYALL) KEYWORD(short , KEYALL) KEYWORD(signed , KEYALL) UNARY_EXPR_OR_TYPE_TRAIT(sizeof, SizeOf, KEYALL) +UNARY_EXPR_OR_TYPE_TRAIT(__datasizeof, DataSizeOf, KEYCXX) KEYWORD(static , KEYALL) KEYWORD(struct , KEYALL) KEYWORD(switch , KEYALL) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e16fec6109e744..4aa8045bc93be7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3184,9 +3184,14 @@ static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E, return true; } +enum class SizeOfType { + SizeOf, + DataSizeOf, +}; + /// Get the size of the given type in char units. -static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, - QualType Type, CharUnits &Size) { +static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, QualType Type, + CharUnits &Size, SizeOfType SOT = SizeOfType::SizeOf) { // sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc // extension. if (Type->isVoidType() || Type->isFunctionType()) { @@ -3206,7 +3211,10 @@ static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, return false; } - Size = Info.Ctx.getTypeSizeInChars(Type); + if (SOT == SizeOfType::SizeOf) + Size = Info.Ctx.getTypeSizeInChars(Type); + else + Size = Info.Ctx.getTypeInfoDataSizeInChars(Type).Width; return true; } @@ -13689,6 +13697,7 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr( return Success(1, E); } + case UETT_DataSizeOf: case UETT_SizeOf: { QualType SrcTy = E->getTypeOfArgument(); // C++ [expr.sizeof]p2: "When applied to a reference or a reference type, @@ -13697,8 +13706,11 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr( SrcTy = Ref->getPointeeType(); CharUnits Sizeof; - if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof)) + if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof, + E->getKind() == UETT_DataSizeOf ? SizeOfType::DataSizeOf + : SizeOfType::SizeOf)) { return false; + } return Success(Sizeof, E); } case UETT_OpenMPRequiredSimdAlign: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 5ac8c2e447cdb5..6c5217f0da11e6 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -28,6 +28,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/ABI.h" +#include "clang/Basic/DiagnosticAST.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" @@ -5068,6 +5069,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, Out << 'a'; MangleAlignofSizeofArg(); break; + case UETT_DataSizeOf: { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = + Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot yet mangle __datasizeof expression"); + Diags.Report(DiagID); + return; + } case UETT_VecStep: { DiagnosticsEngine &Diags = Context.getDiags(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 1a7a3f97bb779a..05a3c80fbdd03a 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -3053,9 +3053,10 @@ Value * ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( const UnaryExprOrTypeTraitExpr *E) { QualType TypeToSize = E->getTypeOfArgument(); - if (E->getKind() == UETT_SizeOf) { + if (auto Kind = E->getKind(); + Kind == UETT_SizeOf || Kind == UETT_DataSizeOf) { if (const VariableArrayType *VAT = - CGF.getContext().getAsVariableArrayType(TypeToSize)) { + CGF.getContext().getAsVariableArrayType(TypeToSize)) { if (E->isArgumentType()) { // sizeof(type) - make sure to emit the VLA size. CGF.EmitVariablyModifiedType(TypeToSize); diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 53fba3b2f59242..9e05394e8d07dd 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1460,6 +1460,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, // unary-expression: '__alignof' '(' type-name ')' case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression // unary-expression: 'sizeof' '(' type-name ')' + // unary-expression: '__datasizeof' unary-expression + // unary-expression: '__datasizeof' '(' type-name ')' + case tok::kw___datasizeof: case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression // unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')' case tok::kw___builtin_omp_required_simd_align: @@ -2307,6 +2310,8 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { /// unary-expression: [C99 6.5.3] /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' +/// [Clang] '__datasizeof' unary-expression +/// [Clang] '__datasizeof' '(' type-name ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' /// [C11] '_Alignof' '(' type-name ')' @@ -2335,8 +2340,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, SourceRange &CastRange) { assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_typeof_unqual, tok::kw_sizeof, - tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof, - tok::kw_vec_step, + tok::kw___datasizeof, tok::kw___alignof, tok::kw_alignof, + tok::kw__Alignof, tok::kw_vec_step, tok::kw___builtin_omp_required_simd_align, tok::kw___builtin_vectorelements) && "Not a typeof/sizeof/alignof/vec_step expression!"); @@ -2347,8 +2352,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, if (Tok.isNot(tok::l_paren)) { // If construct allows a form without parenthesis, user may forget to put // pathenthesis around type name. - if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof, - tok::kw__Alignof)) { + if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof, + tok::kw_alignof, tok::kw__Alignof)) { if (isTypeIdUnambiguously()) { DeclSpec DS(AttrFactory); ParseSpecifierQualifierList(DS); @@ -2451,14 +2456,16 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() { /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [C++11] 'sizeof' '...' '(' identifier ')' +/// [Clang] '__datasizeof' unary-expression +/// [Clang] '__datasizeof' '(' type-name ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' /// [C11] '_Alignof' '(' type-name ')' /// [C++11] 'alignof' '(' type-id ')' /// \endverbatim ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { - assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof, - tok::kw__Alignof, tok::kw_vec_step, + assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof, + tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step, tok::kw___builtin_omp_required_simd_align, tok::kw___builtin_vectorelements) && "Not a sizeof/alignof/vec_step expression!"); @@ -2531,16 +2538,29 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { CastRange); UnaryExprOrTypeTrait ExprKind = UETT_SizeOf; - if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof)) + switch (OpTok.getKind()) { + case tok::kw_alignof: + case tok::kw__Alignof: ExprKind = UETT_AlignOf; - else if (OpTok.is(tok::kw___alignof)) + break; + case tok::kw___alignof: ExprKind = UETT_PreferredAlignOf; - else if (OpTok.is(tok::kw_vec_step)) + break; + case tok::kw_vec_step: ExprKind = UETT_VecStep; - else if (OpTok.is(tok::kw___builtin_omp_required_simd_align)) + break; + case tok::kw___builtin_omp_required_simd_align: ExprKind = UETT_OpenMPRequiredSimdAlign; - else if (OpTok.is(tok::kw___builtin_vectorelements)) + break; + case tok::kw___datasizeof: + ExprKind = UETT_DataSizeOf; + break; + case tok::kw___builtin_vectorelements: ExprKind = UETT_VectorElements; + break; + default: + break; + } if (isCastExpr) return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(), diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 432e4285e8a01d..072c800ad4c555 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -4444,8 +4444,9 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, assert(!ExprTy->isReferenceType()); bool IsUnevaluatedOperand = - (ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf || - ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_VecStep); + (ExprKind == UETT_SizeOf || ExprKind == UETT_DataSizeOf || + ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf || + ExprKind == UETT_VecStep); if (IsUnevaluatedOperand) { ExprResult Result = CheckUnevaluatedOperand(E); if (Result.isInvalid()) diff --git a/clang/test/CodeGenCXX/datasizeof.cpp b/clang/test/CodeGenCXX/datasizeof.cpp new file mode 100644 index 00000000000000..5a8f4fc79bdf05 --- /dev/null +++ b/clang/test/CodeGenCXX/datasizeof.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define dso_local noundef i32 @_Z4testi( +// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int test(int i) { + (void)__datasizeof(int[i++]); + return i; +} diff --git a/clang/test/SemaCXX/datasizeof.cpp b/clang/test/SemaCXX/datasizeof.cpp new file mode 100644 index 00000000000000..f96660d2028d07 --- /dev/null +++ b/clang/test/SemaCXX/datasizeof.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -fsyntax-only -triple x86_64-linux-gnu -verify %s + +#if !__has_extension(datasizeof) +# error "Expected datasizeof extension" +#endif + +struct HasPadding { + int i; + char c; +}; + +struct HasUsablePadding { + int i; + char c; + + HasUsablePadding() {} +}; + +struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}} + +static_assert(__datasizeof(int) == 4); +static_assert(__datasizeof(HasPadding) == 8); +static_assert(__datasizeof(HasUsablePadding) == 5); +static_assert(__datasizeof(void)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'void'}} +static_assert(__datasizeof(Incomplete)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'Incomplete'}} + +static_assert([] { + int* p = nullptr; + HasPadding* p2 = nullptr; + HasUsablePadding* p3 = nullptr; + static_assert(__datasizeof(*p) == 4); + static_assert(__datasizeof *p == 4); + static_assert(__datasizeof(*p2) == 8); + static_assert(__datasizeof(*p3) == 5); + + return true; +}()); + +template +constexpr int data_size_of() { + return __datasizeof(Ty); +} +static_assert(data_size_of() == __datasizeof(int)); +static_assert(data_size_of() == __datasizeof(HasPadding)); +static_assert(data_size_of() == __datasizeof(HasUsablePadding)); + +struct S { + int i = __datasizeof(S); + float f; + char c; +}; + +static_assert(S{}.i == 9);