From a5c948a19044ff79e02788bff7a50c2385b489f0 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 10 May 2024 13:15:07 -0700 Subject: [PATCH 1/2] Merge pull request #73491 from tbkka/tbkka-mpe-sparebits Calculate spare bits for multi-payload enums from first principles This adds a getSpareBits method to all the TypeInfo classes that returns a suitable bitmask indicating the spare bits available in values of this type. This gives us a way to recursively explore the type tree and build up a full spare bit mask for an arbitrary type. We mostly have enough information to do this calculation entirely from first principles, without requiring additional reflection information. So once this is stable, we should remove my earlier incomplete effort to publish spare bit mask info in the reflection data, as that adds unnecessary metadata to every binary. TODO: Resilience forces some enums to not use spare bits even though they otherwise would be able to. We should have the compiler add a single bit to the reflectio data indicating whether or not spare bits were used and then rely on this code to actually compute the spare bits. This doubtless still has plenty of holes, but seems sufficient to handle a few basic enum types, including the stdlib DecodingError which was used as an example to work out some key issues. Resolves rdar://126563813 --- include/swift/RemoteInspection/BitMask.h | 339 ++++++++++ include/swift/RemoteInspection/TypeLowering.h | 13 + .../public/RemoteInspection/TypeLowering.cpp | 594 +++++------------- .../Reflection/reflect_Enum_value.swift | 24 + .../Reflection/reflect_Enum_values5.swift | 63 ++ .../Reflection/reflect_Enum_values6.swift | 82 +++ 6 files changed, 693 insertions(+), 422 deletions(-) create mode 100644 include/swift/RemoteInspection/BitMask.h create mode 100644 validation-test/Reflection/reflect_Enum_values5.swift create mode 100644 validation-test/Reflection/reflect_Enum_values6.swift diff --git a/include/swift/RemoteInspection/BitMask.h b/include/swift/RemoteInspection/BitMask.h new file mode 100644 index 0000000000000..f64f2126996a1 --- /dev/null +++ b/include/swift/RemoteInspection/BitMask.h @@ -0,0 +1,339 @@ +//===--- Bitmask.h - Swift Bitmask type for Reflection ----*- C++ -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +// +// Used by TypeLowering logic to compute masks for in-memory representations +// +//===----------------------------------------------------------------------===// + +#ifndef SWIFT_REFLECTION_BITMASK_H +#define SWIFT_REFLECTION_BITMASK_H + +#include "swift/Remote/MemoryReader.h" +#include + +namespace swift { +namespace reflection { + +// A variable-length bitmap used to track "spare bits" for general multi-payload +// enums. Note: These are not arbitrary-sized! They are always a multiple +// of 8 bits in size, and always aligned on an 8-bit boundary. +class BitMask { + static constexpr unsigned maxSize = 128 * 1024 * 1024; // 128MB + + unsigned size; // Size of mask _in bytes_ + uint8_t *mask; +public: + ~BitMask() { + free(mask); + } +private: + // Construct a bitmask of the appropriate number of bytes + // initialized to all bits set + BitMask(unsigned sizeInBytes = 0): size(sizeInBytes) { + assert(size < maxSize && "Trying to build a too-large bitmask"); + if (size > maxSize || size == 0) { + size = 0; + mask = nullptr; + return; + } + + mask = (uint8_t *)malloc(size); + + if (!mask) { + // Malloc might fail if size is large due to some bad data. Assert in + // asserts builds, and fail gracefully in non-asserts builds by + // constructing an empty BitMask. + assert(false && "Failed to allocate BitMask"); + size = 0; + return; + } + + memset(mask, 0xff, size); + } + +public: + static BitMask zeroMask(unsigned sizeInBytes) { + auto mask = BitMask(sizeInBytes); + mask.makeZero(); + return mask; + } + + static BitMask oneMask(unsigned sizeInBytes) { + auto mask = BitMask(sizeInBytes); + return mask; + } + + BitMask(unsigned sizeInBytes, uint64_t sourceMask): size(sizeInBytes) { + mask = (uint8_t *)calloc(1, sizeInBytes); + memcpy(mask, &sourceMask, sizeInBytes); + } + + // Construct a bitmask of the appropriate number of bytes + // initialized with bits from the specified buffer + BitMask(unsigned sizeInBytes, const uint8_t *initialValue, + unsigned initialValueBytes, unsigned offset) + : size(sizeInBytes) { + // Gracefully fail by constructing an empty mask if we exceed the size + // limit. + if (size > maxSize) { + size = 0; + mask = nullptr; + return; + } + + // Bad data could cause the initial value location to be off the end of our + // size. If initialValueBytes + offset is beyond sizeInBytes (or overflows), + // assert in asserts builds, and fail gracefully in non-asserts builds by + // constructing an empty BitMask. + bool overflowed = false; + unsigned initialValueEnd = + llvm::SaturatingAdd(initialValueBytes, offset, &overflowed); + if (overflowed) { + assert(false && "initialValueBytes + offset overflowed"); + size = 0; + mask = nullptr; + return; + } + assert(initialValueEnd <= sizeInBytes); + if (initialValueEnd > size) { + assert(false && "initialValueBytes + offset is greater than size"); + size = 0; + mask = nullptr; + return; + } + + mask = (uint8_t *)calloc(1, size); + + if (!mask) { + // Malloc might fail if size is large due to some bad data. Assert in + // asserts builds, and fail gracefully in non-asserts builds by + // constructing an empty BitMask. + assert(false && "Failed to allocate BitMask"); + size = 0; + return; + } + + memcpy(mask + offset, initialValue, initialValueBytes); + } + // Move constructor moves ownership and zeros the src + BitMask(BitMask&& src) noexcept: size(src.size), mask(std::move(src.mask)) { + src.size = 0; + src.mask = nullptr; + } + // Copy constructor makes a copy of the mask storage + BitMask(const BitMask& src) noexcept: size(src.size), mask(nullptr) { + mask = (uint8_t *)malloc(size); + memcpy(mask, src.mask, size); + } + + std::string str() const { + std::ostringstream buff; + buff << size << ":0x"; + for (unsigned i = 0; i < size; i++) { + buff << std::hex << ((mask[i] >> 4) & 0x0f) << (mask[i] & 0x0f); + } + return buff.str(); + } + + bool operator==(const BitMask& rhs) const { + // The two masks may be of different sizes. + // The common prefix must be identical. + size_t common = std::min(size, rhs.size); + if (memcmp(mask, rhs.mask, common) != 0) + return false; + // The remainder of the longer mask must be + // all zero bits. + unsigned mustBeZeroSize = std::max(size, rhs.size) - common; + uint8_t *mustBeZero; + if (size < rhs.size) { + mustBeZero = rhs.mask + size; + } else if (size > rhs.size) { + mustBeZero = mask + rhs.size; + } + for (unsigned i = 0; i < mustBeZeroSize; ++i) { + if (mustBeZero[i] != 0) { + return false; + } + } + return true; + } + + bool operator!=(const BitMask& rhs) const { + return !(*this == rhs); + } + + bool isNonZero() const { return !isZero(); } + + bool isZero() const { + for (unsigned i = 0; i < size; ++i) { + if (mask[i] != 0) { + return false; + } + } + return true; + } + + void makeZero() { + memset(mask, 0, size * sizeof(mask[0])); + } + + void complement() { + for (unsigned i = 0; i < size; ++i) { + mask[i] = ~mask[i]; + } + } + + int countSetBits() const { + static const int counter[] = + {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; + int bits = 0; + for (unsigned i = 0; i < size; ++i) { + bits += counter[mask[i] >> 4] + counter[mask[i] & 15]; + } + return bits; + } + + int countZeroBits() const { + return (size * 8) - countSetBits(); + } + + // Treat the provided value as a mask, `and` it with + // the part of the mask at the provided byte offset. + // Bits outside the specified area are unchanged. + template + void andMask(IntegerType value, unsigned byteOffset) { + andMask((void *)&value, sizeof(value), byteOffset); + } + + // As above, but using the provided bitmask instead + // of an integer. + void andMask(BitMask mask, unsigned offset) { + andMask(mask.mask, mask.size, offset); + } + + // As above, but using the complement of the + // provided mask. + void andNotMask(BitMask mask, unsigned offset) { + if (offset < size) { + andNotMask(mask.mask, mask.size, offset); + } + } + + // Zero all bits except for the `n` most significant ones. + void keepOnlyMostSignificantBits(unsigned n) { + if (size < 1) { + return; + } +#if defined(__BIG_ENDIAN__) + assert(false && "Big endian not supported for readMaskedInteger"); +#else + unsigned count = 0; + unsigned i = size; + while (i > 0) { + i -= 1; + if (count < n) { + for (int b = 128; b > 0; b >>= 1) { + if (count >= n) { + mask[i] &= ~b; + } else if ((mask[i] & b) != 0) { + ++count; + } + } + } else { + mask[i] = 0; + } + } +#endif + } + + void keepOnlyLeastSignificantBytes(unsigned n) { + if (size > n) { + size = n; + } + } + + unsigned numBits() const { + return size * 8; + } + + unsigned numSetBits() const { + unsigned count = 0; + for (unsigned i = 0; i < size; ++i) { + if (mask[i] != 0) { + for (unsigned b = 1; b < 256; b <<= 1) { + if ((mask[i] & b) != 0) { + ++count; + } + } + } + } + return count; + } + + // Read a mask-sized area from the target and collect + // the masked bits into a single integer. + template + bool readMaskedInteger(remote::MemoryReader &reader, + remote::RemoteAddress address, + IntegerType *dest) const { + auto data = reader.readBytes(address, size); + if (!data) { + return false; + } +#if defined(__BIG_ENDIAN__) + assert(false && "Big endian not supported for readMaskedInteger"); +#else + IntegerType result = 0; + IntegerType resultBit = 1; // Start from least-significant bit + auto bytes = static_cast(data.get()); + for (unsigned i = 0; i < size; ++i) { + for (unsigned b = 1; b < 256; b <<= 1) { + if ((mask[i] & b) != 0) { + if ((bytes[i] & b) != 0) { + result |= resultBit; + } + resultBit <<= 1; + } + } + } + *dest = result; + return true; +#endif + } + +private: + void andMask(void *maskData, unsigned len, unsigned offset) { + if (offset < size) { + unsigned common = std::min(len, size - offset); + uint8_t *maskBytes = (uint8_t *)maskData; + for (unsigned i = 0; i < common; ++i) { + mask[i + offset] &= maskBytes[i]; + } + } + } + + void andNotMask(void *maskData, unsigned len, unsigned offset) { + assert(offset < size); + if (offset < size) { + unsigned common = std::min(len, size - offset); + uint8_t *maskBytes = (uint8_t *)maskData; + for (unsigned i = 0; i < common; ++i) { + mask[i + offset] &= ~maskBytes[i]; + } + } + } +}; + +} // namespace reflection +} // namespace swift + +#endif diff --git a/include/swift/RemoteInspection/TypeLowering.h b/include/swift/RemoteInspection/TypeLowering.h index ffe5b15bbdeea..0fdaa2e5dc6c1 100644 --- a/include/swift/RemoteInspection/TypeLowering.h +++ b/include/swift/RemoteInspection/TypeLowering.h @@ -23,6 +23,7 @@ #include "llvm/Support/Casting.h" #include "swift/Remote/MetadataReader.h" #include "swift/Remote/TypeInfoProvider.h" +#include "swift/RemoteInspection/BitMask.h" #include "swift/RemoteInspection/DescriptorFinder.h" #include @@ -34,6 +35,7 @@ using llvm::cast; using llvm::dyn_cast; using remote::RemoteRef; +class TypeConverter; class TypeRef; class TypeRefBuilder; class BuiltinTypeDescriptor; @@ -158,6 +160,11 @@ class TypeInfo { return false; } + // Calculate and return the spare bit mask for this type + virtual BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const { + return BitMask::zeroMask(getSize()); + } + virtual ~TypeInfo() { } }; @@ -195,6 +202,8 @@ class BuiltinTypeInfo : public TypeInfo { remote::RemoteAddress address, int *extraInhabitantIndex) const override; + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override; + static bool classof(const TypeInfo *TI) { return TI->getKind() == TypeInfoKind::Builtin; } @@ -222,6 +231,8 @@ class RecordTypeInfo : public TypeInfo { remote::RemoteAddress address, int *index) const override; + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override; + static bool classof(const TypeInfo *TI) { return TI->getKind() == TypeInfoKind::Record; } @@ -330,6 +341,8 @@ class ReferenceTypeInfo : public TypeInfo { return reader.readHeapObjectExtraInhabitantIndex(address, extraInhabitantIndex); } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override; + static bool classof(const TypeInfo *TI) { return TI->getKind() == TypeInfoKind::Reference; } diff --git a/stdlib/public/RemoteInspection/TypeLowering.cpp b/stdlib/public/RemoteInspection/TypeLowering.cpp index 8a2047bdcb9a2..68cdd2a1e0397 100644 --- a/stdlib/public/RemoteInspection/TypeLowering.cpp +++ b/stdlib/public/RemoteInspection/TypeLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/MathExtras.h" #include "swift/ABI/Enum.h" #include "swift/ABI/MetadataValues.h" +#include "swift/RemoteInspection/BitMask.h" #include "swift/RemoteInspection/TypeLowering.h" #include "swift/RemoteInspection/TypeRef.h" #include "swift/RemoteInspection/TypeRefBuilder.h" @@ -229,6 +230,11 @@ void TypeInfo::dump(std::ostream &stream, unsigned Indent) const { stream << "\n"; } +BitMask ReferenceTypeInfo::getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const { + auto mpePointerSpareBits = TC.getBuilder().getMultiPayloadEnumPointerMask(); + return BitMask(getSize(), mpePointerSpareBits); +} + BuiltinTypeInfo::BuiltinTypeInfo(TypeRefBuilder &builder, BuiltinTypeDescriptorBase &descriptor) : TypeInfo(TypeInfoKind::Builtin, descriptor.Size, @@ -237,6 +243,21 @@ BuiltinTypeInfo::BuiltinTypeInfo(TypeRefBuilder &builder, descriptor.IsBitwiseTakable), Name(descriptor.getMangledTypeName()) {} +// Builtin.Int is mangled as 'Bi' N '_' +// Returns 0 if this isn't an Int +static unsigned isIntType(std::string name) { + llvm::StringRef nameRef(name); + if (nameRef.starts_with("Bi") && nameRef.ends_with("_")) { + llvm::StringRef naturalRef = nameRef.drop_front(2).drop_back(); + uint8_t natural; + if (naturalRef.getAsInteger(10, natural)) { + return 0; + } + return natural; + } + return 0; +} + bool BuiltinTypeInfo::readExtraInhabitantIndex( remote::MemoryReader &reader, remote::RemoteAddress address, int *extraInhabitantIndex) const { @@ -245,46 +266,39 @@ bool BuiltinTypeInfo::readExtraInhabitantIndex( return true; } // If it has extra inhabitants, it could be an integer type with extra - // inhabitants (a bool) or a pointer. - // Check if it's an integer first. The mangling of an integer type is - // type ::= 'Bi' NATURAL '_' - llvm::StringRef nameRef(Name); - if (nameRef.starts_with("Bi") && nameRef.endswith("_")) { - // Drop the front "Bi" and "_" end, check that what we're left with is a - // bool. - llvm::StringRef naturalRef = nameRef.drop_front(2).drop_back(); - uint8_t natural; - if (naturalRef.getAsInteger(10, natural)) - return false; - - assert(natural == 1 && - "Reading extra inhabitants of integer with more than 1 byte!"); - if (natural != 1) - return false; - - assert(getSize() == 1 && "Reading extra inhabitants of integer but size of " - "type info is different than 1!"); - if (getSize() != 1) - return false; + // inhabitants (such as a bool) or a pointer. + unsigned intSize = isIntType(Name); + if (intSize > 0) { + // This is an integer type + + // If it cannot have extra inhabitants, return early... + assert(intSize < getSize() * 8 + && "Standard-sized int cannot have extra inhabitants"); + if (intSize > 64 || getSize() > 8 || intSize >= getSize() * 8) { + *extraInhabitantIndex = -1; + return true; + } - assert(getNumExtraInhabitants() == 254 && - "Boolean type info should have 254 extra inhabitants!"); - if (getNumExtraInhabitants() != 254) - return false; + // Compute range of extra inhabitants + uint64_t maxValidValue = (((uint64_t)1) << intSize) - 1; + uint64_t maxAvailableValue = (((uint64_t)1) << (getSize() * 8)) - 1; + uint64_t computedExtraInhabitants = maxAvailableValue - maxValidValue; + if (computedExtraInhabitants > ValueWitnessFlags::MaxNumExtraInhabitants) { + computedExtraInhabitants = ValueWitnessFlags::MaxNumExtraInhabitants; + } + assert(getNumExtraInhabitants() == computedExtraInhabitants && + "Unexpected number of extra inhabitants in an odd-sized integer"); - uint8_t rawValue; - if (!reader.readInteger(address, &rawValue)) + // Example: maxValidValue is 1 for a 1-bit bool, so any larger value + // is an extra inhabitant. + uint64_t rawValue; + if (!reader.readInteger(address, getSize(), &rawValue)) return false; - - // The max valid value, for a bool valid values are 0 or 1, so this would - // be 1. - auto maxValidValue = 1; - // If the raw value falls outside the range of valid values, this is an - // extra inhabitant. - if (maxValidValue < rawValue) + if (maxValidValue < rawValue) { *extraInhabitantIndex = rawValue - maxValidValue - 1; - else + } else { *extraInhabitantIndex = -1; + } return true; } else if (Name == "yyXf") { // But there are two different conventions, one for function pointers: @@ -297,6 +311,26 @@ bool BuiltinTypeInfo::readExtraInhabitantIndex( } } +BitMask BuiltinTypeInfo::getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const { + unsigned intSize = isIntType(Name); + if (intSize > 0) { + // Odd-sized integers export spare bits + // In particular: bool fields are Int1 and export 7 spare bits + auto mask = BitMask::oneMask(getSize()); + mask.keepOnlyMostSignificantBits(getSize() * 8 - intSize); + return mask; + } else if ( + Name == "yyXf" // 'yyXf' = @thin () -> Void function + ) { + // Builtin types that expose pointer spare bits + auto mpePointerSpareBits = TC.getBuilder().getMultiPayloadEnumPointerMask(); + return BitMask(getSize(), mpePointerSpareBits); + } else { + // Everything else + return BitMask::zeroMask(getSize()); + } +} + bool RecordTypeInfo::readExtraInhabitantIndex(remote::MemoryReader &reader, remote::RemoteAddress address, int *extraInhabitantIndex) const { @@ -369,6 +403,45 @@ bool RecordTypeInfo::readExtraInhabitantIndex(remote::MemoryReader &reader, return false; } +BitMask RecordTypeInfo::getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const { + auto mask = BitMask::oneMask(getSize()); + switch (SubKind) { + case RecordKind::Invalid: + return mask; // FIXME: Should invalid have all spare bits? Or none? Does it matter? + case RecordKind::Tuple: + case RecordKind::Struct: + break; + case RecordKind::ThickFunction: + break; + case RecordKind::OpaqueExistential: { + // Existential storage isn't recorded as a field, + // so we handle it specially here... + int pointerSize = TC.targetPointerSize(); + BitMask submask = BitMask::zeroMask(pointerSize * 3); + mask.andMask(submask, 0); + hasAddrOnly = true; + break; + } + case RecordKind::ClassExistential: + break; + case RecordKind::ExistentialMetatype: + break; // Field 0 is metadata pointer, a Builtin of type 'yyXf' + case RecordKind::ErrorExistential: + break; + case RecordKind::ClassInstance: + break; + case RecordKind::ClosureContext: + break; + } + for (auto Field : Fields) { + if (Field.TR != 0) { + BitMask submask = Field.TI.getSpareBits(TC, hasAddrOnly); + mask.andMask(submask, Field.Offset); + } + } + return mask; +} + class UnsupportedEnumTypeInfo: public EnumTypeInfo { public: UnsupportedEnumTypeInfo(unsigned Size, unsigned Alignment, @@ -384,6 +457,10 @@ class UnsupportedEnumTypeInfo: public EnumTypeInfo { return false; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + return BitMask::zeroMask(getSize()); + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -410,6 +487,10 @@ class EmptyEnumTypeInfo: public EnumTypeInfo { return false; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + return BitMask::zeroMask(getSize()); + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -442,6 +523,10 @@ class TrivialEnumTypeInfo: public EnumTypeInfo { return true; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + return BitMask::zeroMask(getSize()); + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -497,6 +582,10 @@ class NoPayloadEnumTypeInfo: public EnumTypeInfo { return true; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + return BitMask::zeroMask(getSize()); + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -563,6 +652,10 @@ class SinglePayloadEnumTypeInfo: public EnumTypeInfo { } } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + return BitMask::zeroMask(getSize()); + } + // Think of a single-payload enum as being encoded in "pages". // The discriminator (tag) tells us which page we're on: // * Page 0 is the payload page which can either store @@ -718,6 +811,16 @@ class TaggedMultiPayloadEnumTypeInfo: public EnumTypeInfo { return true; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + // Walk the child cases to set `hasAddrOnly` correctly. + for (auto Case : getCases()) { + if (Case.TR != 0) { + auto submask = Case.TI.getSpareBits(TC, hasAddrOnly); + } + } + return BitMask::zeroMask(getSize()); + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -762,294 +865,6 @@ class TaggedMultiPayloadEnumTypeInfo: public EnumTypeInfo { } }; -// A variable-length bitmap used to track "spare bits" for general multi-payload -// enums. -class BitMask { - static constexpr unsigned maxSize = 128 * 1024 * 1024; // 128MB - - unsigned size; // Size of mask in bytes - uint8_t *mask; -public: - ~BitMask() { - free(mask); - } - // Construct a bitmask of the appropriate number of bytes - // initialized to all bits set - BitMask(unsigned sizeInBytes): size(sizeInBytes) { - // Gracefully fail by constructing an empty mask if we exceed the size - // limit. - if (size > maxSize) { - size = 0; - mask = nullptr; - return; - } - - mask = (uint8_t *)malloc(size); - - if (!mask) { - // Malloc might fail if size is large due to some bad data. Assert in - // asserts builds, and fail gracefully in non-asserts builds by - // constructing an empty BitMask. - assert(false && "Failed to allocate BitMask"); - size = 0; - return; - } - - memset(mask, 0xff, size); - } - // Construct a bitmask of the appropriate number of bytes - // initialized with bits from the specified buffer - BitMask(unsigned sizeInBytes, const uint8_t *initialValue, - unsigned initialValueBytes, unsigned offset) - : size(sizeInBytes) { - // Gracefully fail by constructing an empty mask if we exceed the size - // limit. - if (size > maxSize) { - size = 0; - mask = nullptr; - return; - } - - // Bad data could cause the initial value location to be off the end of our - // size. If initialValueBytes + offset is beyond sizeInBytes (or overflows), - // assert in asserts builds, and fail gracefully in non-asserts builds by - // constructing an empty BitMask. - bool overflowed = false; - unsigned initialValueEnd = - llvm::SaturatingAdd(initialValueBytes, offset, &overflowed); - if (overflowed) { - assert(false && "initialValueBytes + offset overflowed"); - size = 0; - mask = nullptr; - return; - } - assert(initialValueEnd <= sizeInBytes); - if (initialValueEnd > size) { - assert(false && "initialValueBytes + offset is greater than size"); - size = 0; - mask = nullptr; - return; - } - - mask = (uint8_t *)calloc(1, size); - - if (!mask) { - // Malloc might fail if size is large due to some bad data. Assert in - // asserts builds, and fail gracefully in non-asserts builds by - // constructing an empty BitMask. - assert(false && "Failed to allocate BitMask"); - size = 0; - return; - } - - memcpy(mask + offset, initialValue, initialValueBytes); - } - // Move constructor moves ownership and zeros the src - BitMask(BitMask&& src) noexcept: size(src.size), mask(std::move(src.mask)) { - src.size = 0; - src.mask = nullptr; - } - // Copy constructor makes a copy of the mask storage - BitMask(const BitMask& src) noexcept: size(src.size), mask(nullptr) { - mask = (uint8_t *)malloc(size); - memcpy(mask, src.mask, size); - } - - std::string str() const { - std::ostringstream buff; - buff << size << ":0x"; - for (unsigned i = 0; i < size; i++) { - buff << std::hex << ((mask[i] >> 4) & 0x0f) << (mask[i] & 0x0f); - } - return buff.str(); - } - - bool operator==(const BitMask& rhs) const { - // The two masks may be of different sizes. - // The common prefix must be identical. - size_t common = std::min(size, rhs.size); - if (memcmp(mask, rhs.mask, common) != 0) - return false; - // The remainder of the longer mask must be - // all zero bits. - unsigned mustBeZeroSize = std::max(size, rhs.size) - common; - uint8_t *mustBeZero; - if (size < rhs.size) { - mustBeZero = rhs.mask + size; - } else if (size > rhs.size) { - mustBeZero = mask + rhs.size; - } - for (unsigned i = 0; i < mustBeZeroSize; ++i) { - if (mustBeZero[i] != 0) { - return false; - } - } - return true; - } - - bool operator!=(const BitMask& rhs) const { - return !(*this == rhs); - } - - bool isNonZero() const { return !isZero(); } - - bool isZero() const { - for (unsigned i = 0; i < size; ++i) { - if (mask[i] != 0) { - return false; - } - } - return true; - } - - void makeZero() { - memset(mask, 0, size * sizeof(mask[0])); - } - - void complement() { - for (unsigned i = 0; i < size; ++i) { - mask[i] = ~mask[i]; - } - } - - int countSetBits() const { - static const int counter[] = - {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; - int bits = 0; - for (unsigned i = 0; i < size; ++i) { - bits += counter[mask[i] >> 4] + counter[mask[i] & 15]; - } - return bits; - } - - int countZeroBits() const { - static const int counter[] = - {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - int bits = 0; - for (unsigned i = 0; i < size; ++i) { - bits += counter[mask[i] >> 4] + counter[mask[i] & 15]; - } - return bits; - } - - // Treat the provided value as a mask, `and` it with - // the part of the mask at the provided byte offset. - // Bits outside the specified area are unchanged. - template - void andMask(IntegerType value, unsigned byteOffset) { - andMask((void *)&value, sizeof(value), byteOffset); - } - - // As above, but using the provided bitmask instead - // of an integer. - void andMask(BitMask mask, unsigned offset) { - andMask(mask.mask, mask.size, offset); - } - - // As above, but using the complement of the - // provided mask. - void andNotMask(BitMask mask, unsigned offset) { - if (offset < size) { - andNotMask(mask.mask, mask.size, offset); - } - } - - // Zero all bits except for the `n` most significant ones. - // XXX TODO: Big-endian support? - void keepOnlyMostSignificantBits(unsigned n) { - unsigned count = 0; - if (size < 1) { - return; - } - unsigned i = size; - while (i > 0) { - i -= 1; - if (count < n) { - for (int b = 128; b > 0; b >>= 1) { - if (count >= n) { - mask[i] &= ~b; - } else if ((mask[i] & b) != 0) { - ++count; - } - } - } else { - mask[i] = 0; - } - } - } - - unsigned numBits() const { - return size * 8; - } - - unsigned numSetBits() const { - unsigned count = 0; - for (unsigned i = 0; i < size; ++i) { - if (mask[i] != 0) { - for (unsigned b = 1; b < 256; b <<= 1) { - if ((mask[i] & b) != 0) { - ++count; - } - } - } - } - return count; - } - - // Read a mask-sized area from the target and collect - // the masked bits into a single integer. - template - bool readMaskedInteger(remote::MemoryReader &reader, - remote::RemoteAddress address, - IntegerType *dest) const { - auto data = reader.readBytes(address, size); - if (!data) { - return false; - } -#if defined(__BIG_ENDIAN__) - assert(false && "Big endian not supported for readMaskedInteger"); -#else - IntegerType result = 0; - IntegerType resultBit = 1; // Start from least-significant bit - auto bytes = static_cast(data.get()); - for (unsigned i = 0; i < size; ++i) { - for (unsigned b = 1; b < 256; b <<= 1) { - if ((mask[i] & b) != 0) { - if ((bytes[i] & b) != 0) { - result |= resultBit; - } - resultBit <<= 1; - } - } - } - *dest = result; - return true; -#endif - } - -private: - void andMask(void *maskData, unsigned len, unsigned offset) { - if (offset < size) { - unsigned common = std::min(len, size - offset); - uint8_t *maskBytes = (uint8_t *)maskData; - for (unsigned i = 0; i < common; ++i) { - mask[i + offset] &= maskBytes[i]; - } - } - } - - void andNotMask(void *maskData, unsigned len, unsigned offset) { - assert(offset < size); - if (offset < size) { - unsigned common = std::min(len, size - offset); - uint8_t *maskBytes = (uint8_t *)maskData; - for (unsigned i = 0; i < common; ++i) { - mask[i + offset] &= ~maskBytes[i]; - } - } - } -}; - // General multi-payload enum support for enums that do use spare // bits in the payload. class MultiPayloadEnumTypeInfo: public EnumTypeInfo { @@ -1102,8 +917,9 @@ class MultiPayloadEnumTypeInfo: public EnumTypeInfo { tag = payloadTagLow; tagBits = payloadTagLowBitCount; - // Read the other spare bits + // Read the other spare bits from the payload area auto otherSpareBitsMask = spareBitsMask; // copy + otherSpareBitsMask.keepOnlyLeastSignificantBytes(getPayloadSize()); otherSpareBitsMask.andNotMask(payloadTagLowBitsMask, 0); auto otherSpareBitsCount = otherSpareBitsMask.countSetBits(); if (otherSpareBitsCount > 0) { @@ -1154,6 +970,13 @@ class MultiPayloadEnumTypeInfo: public EnumTypeInfo { return true; } + BitMask getSpareBits(TypeConverter &TC, bool &hasAddrOnly) const override { + auto mask = spareBitsMask; + // Bits we've used for our tag can't be re-used by a containing enum... + mask.andNotMask(getMultiPayloadTagBitsMask(), 0); + return mask; + } + bool projectEnumValue(remote::MemoryReader &reader, remote::RemoteAddress address, int *CaseIndex) const override { @@ -1221,7 +1044,7 @@ class MultiPayloadEnumTypeInfo: public EnumTypeInfo { // The case value is stored in three pieces: // * A separate "discriminator" tag appended to the payload (if necessary) - // * A "payload tag" that uses (a subset of) the spare bits + // * A "payload tag" that uses (a subset of) the spare bits in the payload // * The remainder of the payload bits (for non-payload cases) // This computes the bits used for the payload tag. BitMask getMultiPayloadTagBitsMask() const { @@ -1235,68 +1058,12 @@ class MultiPayloadEnumTypeInfo: public EnumTypeInfo { payloadTagBits += 1; } BitMask payloadTagBitsMask = spareBitsMask; + payloadTagBitsMask.keepOnlyLeastSignificantBytes(getPayloadSize()); payloadTagBitsMask.keepOnlyMostSignificantBits(payloadTagBits); return payloadTagBitsMask; } }; -// Recursively populate the spare bit mask for this single type -static bool populateSpareBitsMask(const TypeInfo *TI, BitMask &mask, uint64_t mpePointerSpareBits); - -// Recursively populate the spare bit mask for this collection of -// record fields or enum cases. -static bool populateSpareBitsMask(const std::vector &Fields, BitMask &mask, uint64_t mpePointerSpareBits) { - for (auto Field : Fields) { - if (Field.TR != 0) { - BitMask submask(Field.TI.getSize()); - if (!populateSpareBitsMask(&Field.TI, submask, mpePointerSpareBits)) { - return false; - } - mask.andMask(submask, Field.Offset); - } - } - return true; -} - -// General recursive type walk to combine spare bit info from nested structures. -static bool populateSpareBitsMask(const TypeInfo *TI, BitMask &mask, uint64_t mpePointerSpareBits) { - switch (TI->getKind()) { - case TypeInfoKind::Reference: { - if (TI->getSize() == 8) { - mask.andMask(mpePointerSpareBits, 0); - } else /* TI->getSize() == 4 */ { - uint32_t pointerMask = (uint32_t)mpePointerSpareBits; - mask.andMask(pointerMask, 0); - } - break; - } - case TypeInfoKind::Enum: { - auto EnumTI = reinterpret_cast(TI); - // Remove bits used by the payloads - if (!populateSpareBitsMask(EnumTI->getCases(), mask, mpePointerSpareBits)) { - return false; - } - // TODO: Remove bits needed to discriminate payloads. - // Until then, return false for any type with an enum in it so we - // won't claim to support something we don't. - return false; - break; - } - case TypeInfoKind::Record: { - auto RecordTI = dyn_cast(TI); - if (!populateSpareBitsMask(RecordTI->getFields(), mask, mpePointerSpareBits)) { - return false; - } - break; - } - default: { - mask.makeZero(); - break; - } - } - return true; -} - /// Utility class for building values that contain witness tables. class ExistentialTypeInfoBuilder { TypeConverter &TC; @@ -2128,6 +1895,7 @@ class EnumTypeInfoBuilder { return nullptr; } + // Sort and classify the fields for (auto Case : Fields) { if (Case.TR == nullptr) { ++NonPayloadCases; @@ -2306,21 +2074,32 @@ class EnumTypeInfoBuilder { Stride = 1; auto PayloadSize = EnumTypeInfo::getPayloadSizeForCases(Cases); - // If there's a multi-payload enum descriptor, then we - // have spare bits information from the compiler. + // Compute the spare bit mask and determine if we have any address-only fields + auto localSpareBitMask = BitMask::oneMask(Size); + bool hasAddrOnly = false; + for (auto Case : Cases) { + if (Case.TR != 0) { + auto submask = Case.TI.getSpareBits(TC, hasAddrOnly); + localSpareBitMask.andMask(submask, 0); + } + } + + // See if we have MPE bit mask information from the compiler... + // TODO: drop this? // Uncomment the following line to dump the MPE section every time we come through here... //TC.getBuilder().dumpMultiPayloadEnumSection(std::cerr); // DEBUG helper auto MPEDescriptor = TC.getBuilder().getMultiPayloadEnumDescriptor(TR); if (MPEDescriptor && MPEDescriptor->usesPayloadSpareBits()) { + // We found compiler-provided spare bit data... auto PayloadSpareBitMaskByteCount = MPEDescriptor->getPayloadSpareBitMaskByteCount(); auto PayloadSpareBitMaskByteOffset = MPEDescriptor->getPayloadSpareBitMaskByteOffset(); auto SpareBitMask = MPEDescriptor->getPayloadSpareBits(); - BitMask spareBitsMask(PayloadSize, SpareBitMask, + BitMask compilerSpareBitMask(PayloadSize, SpareBitMask, PayloadSpareBitMaskByteCount, PayloadSpareBitMaskByteOffset); - if (spareBitsMask.isZero()) { + if (compilerSpareBitMask.isZero() || hasAddrOnly) { // If there are no spare bits, use the "simple" tag-only implementation. return TC.makeTypeInfo( Size, Alignment, Stride, NumExtraInhabitants, @@ -2328,58 +2107,29 @@ class EnumTypeInfoBuilder { } #if 0 // TODO: This should be !defined(NDEBUG) - // DEBUG verification that compiler mask and locally-computed - // mask are the same (whenever both are available). - BitMask locallyComputedSpareBitsMask(PayloadSize); - auto mpePointerSpareBits = TC.getBuilder().getMultiPayloadEnumPointerMask(); - auto locallyComputedSpareBitsMaskIsValid - = populateSpareBitsMask(Cases, locallyComputedSpareBitsMask, mpePointerSpareBits); - // If the local computation were always correct, we could: - // assert(locallyComputedSpareBitsMaskIsValid); - if (locallyComputedSpareBitsMaskIsValid) { - // Whenever the compiler and local computation both produce - // data, they should agree. - // TODO: Make this true, then change `#if 0` above - assert(locallyComputedSpareBitsMask == spareBitsMask); - } + // Verify that compiler provided and local spare bit info agree... + // TODO: If we could make this actually work, then we wouldn't need the + // bulky compiler-provided info, would we? + assert(localSpareBitMask == compilerSpareBitMask); #endif // Use compiler-provided spare bit information return TC.makeTypeInfo( Size, Alignment, Stride, NumExtraInhabitants, - BitwiseTakable, Cases, spareBitsMask, + BitwiseTakable, Cases, compilerSpareBitMask, EffectivePayloadCases); } - // Either there was no compiler data or it didn't make sense - // (existed but claimed to have no mask). - // Try computing the mask ourselves: This is less robust, but necessary to - // support images from older compilers. - BitMask spareBitsMask(PayloadSize); - auto mpePointerSpareBits = TC.getBuilder().getMultiPayloadEnumPointerMask(); - auto validSpareBitsMask = populateSpareBitsMask(Cases, spareBitsMask, mpePointerSpareBits); - // For DEBUGGING, disable fallback to local computation to - // make missing compiler data more obvious: - // validSpareBitsMask = false; - if (!validSpareBitsMask) { - // If we couldn't correctly determine the spare bits mask, - // return a TI that will always fail when asked for XIs or value. - return TC.makeTypeInfo( - Size, Alignment, Stride, NumExtraInhabitants, - BitwiseTakable, EnumKind::MultiPayloadEnum, Cases); - } else if (spareBitsMask.isZero()) { + if (localSpareBitMask.isZero() || hasAddrOnly) { // Simple case that does not use spare bits - // This is correct as long as our local spare bits calculation - // above only returns an empty mask when the mask is really empty, return TC.makeTypeInfo( Size, Alignment, Stride, NumExtraInhabitants, BitwiseTakable, Cases, EffectivePayloadCases); } else { // General case can mix spare bits and extra discriminator - // It obviously relies on having an accurate spare bit mask. return TC.makeTypeInfo( Size, Alignment, Stride, NumExtraInhabitants, - BitwiseTakable, Cases, spareBitsMask, + BitwiseTakable, Cases, localSpareBitMask, EffectivePayloadCases); } } diff --git a/validation-test/Reflection/reflect_Enum_value.swift b/validation-test/Reflection/reflect_Enum_value.swift index bf5bcbe1e2c2f..d39dd100d8bb8 100644 --- a/validation-test/Reflection/reflect_Enum_value.swift +++ b/validation-test/Reflection/reflect_Enum_value.swift @@ -364,6 +364,13 @@ case leafE case leafF } +reflect(enumValue: OneIndirectPayload.child(.child(.leafF))) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_value.OneIndirectPayload) +// CHECK-NEXT: Value: .child(.child(.leafF)) + reflect(enumValue: OneIndirectPayload.child(.leafF)) // CHECK: Reflecting an enum value. @@ -378,6 +385,23 @@ reflect(enumValue: OneIndirectPayload.leafF) // CHECK-NEXT: (enum reflect_Enum_value.OneIndirectPayload) // CHECK-NEXT: Value: .leafF +enum ADT { + case A + case B(Int) +} + +enum GuineaPig { + case a + indirect case b(ADT) +} + +reflect(enumValue: GuineaPig.b(ADT.B(42))) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_value.GuineaPig) +// CHECK-NEXT: Value: .b(.B(_)) + class SimpleSwiftClass { let value = 7 } diff --git a/validation-test/Reflection/reflect_Enum_values5.swift b/validation-test/Reflection/reflect_Enum_values5.swift new file mode 100644 index 0000000000000..3e191958d17fd --- /dev/null +++ b/validation-test/Reflection/reflect_Enum_values5.swift @@ -0,0 +1,63 @@ +// RUN: %empty-directory(%t) +// RUN: %target-build-swift -lswiftSwiftReflectionTest %s -o %t/reflect_Enum_values5 +// RUN: %target-codesign %t/reflect_Enum_values5 + +// RUN: %target-run %target-swift-reflection-test %t/reflect_Enum_values5 | tee /dev/stderr | %FileCheck %s --dump-input=fail + +// REQUIRES: objc_interop +// REQUIRES: executable_test +// UNSUPPORTED: use_os_stdlib + +import SwiftReflectionTest +public enum E: Error { + public struct Context { + public let a: [any CodingKey] = [] + public let b: String = "abc" + public let c: Error? = nil + } + + case typeMismatch(Any.Type) + case valueNotFound(Any.Type, Context) + case keyNotFound(any CodingKey, Context) + case dataCorrupted(Context) +} + +public enum M { +case A(E) +case B(E, Int) +} + +reflect(enumValue: M.A(.typeMismatch(Int.self))) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values5.M) +// CHECK-NEXT: Value: .A(.typeMismatch(_)) + +reflect(enumValue: M.A(.dataCorrupted(.init()))) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values5.M) +// CHECK-NEXT: Value: .A(.dataCorrupted(_)) + +reflect(enumValue: M.B(.typeMismatch(Int.self), 74)) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values5.M) +// Note: reflection tester only drills down into +// payloads that are a single enum. This payload is a tuple. +// CHECK-NEXT: Value: .B(_) + +reflect(enumValue: M.B(.dataCorrupted(.init()), 42)) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values5.M) +// CHECK-NEXT: Value: .B(_) + +doneReflecting() + +// CHECK: Done. + diff --git a/validation-test/Reflection/reflect_Enum_values6.swift b/validation-test/Reflection/reflect_Enum_values6.swift new file mode 100644 index 0000000000000..13b34d36a09fd --- /dev/null +++ b/validation-test/Reflection/reflect_Enum_values6.swift @@ -0,0 +1,82 @@ +// RUN: %empty-directory(%t) +// RUN: %target-build-swift -lswiftSwiftReflectionTest %s -o %t/reflect_Enum_values6 +// RUN: %target-codesign %t/reflect_Enum_values6 + +// RUN: %target-run %target-swift-reflection-test %t/reflect_Enum_values6 | tee /dev/stderr | %FileCheck %s --dump-input=fail + +// REQUIRES: objc_interop +// REQUIRES: executable_test +// UNSUPPORTED: use_os_stdlib + +import SwiftReflectionTest + +public struct MyError : Error {} + +public enum E1 { +case A(Error) +case B(Error) +} + +// MemoryLayout.size == 8 ==> Error has spare bits + +reflect(enumValue: E1.A(MyError())) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E1) +// CHECK-NEXT: Value: .A(_) + +reflect(enumValue: E1.B(MyError())) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E1) +// CHECK-NEXT: Value: .B(_) + +public enum E2 { +case A(Error?) +case B(Error?) +} + +// MemoryLayout.size == 9 => Error? has no spare bits + +reflect(enumValue: E2.A(MyError())) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E2) +// CHECK-NEXT: Value: .A(.some(_)) + +reflect(enumValue: E2.B(MyError())) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E2) +// CHECK-NEXT: Value: .B(.some(_)) + + +public enum E3 { +case A(Any.Type) +case B(Any.Type) +} + +// MemoryLayout.size == 8 => Any.Type has spare bits + +reflect(enumValue: E3.A(Any.self)) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E3) +// CHECK-NEXT: Value: .A(_) + +reflect(enumValue: E3.B(Any.self)) + +// CHECK: Reflecting an enum value. +// CHECK-NEXT: Type reference: +// CHECK-NEXT: (enum reflect_Enum_values6.E3) +// CHECK-NEXT: Value: .B(_) + +doneReflecting() + +// CHECK: Done. + From bb3b89f487ebdf7345bb4706fa03f1d4eef78b5f Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Tue, 14 May 2024 17:05:26 -0700 Subject: [PATCH 2/2] Use old LLVM method here --- stdlib/public/RemoteInspection/TypeLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/public/RemoteInspection/TypeLowering.cpp b/stdlib/public/RemoteInspection/TypeLowering.cpp index 68cdd2a1e0397..5cd50f8d611cb 100644 --- a/stdlib/public/RemoteInspection/TypeLowering.cpp +++ b/stdlib/public/RemoteInspection/TypeLowering.cpp @@ -247,7 +247,7 @@ BuiltinTypeInfo::BuiltinTypeInfo(TypeRefBuilder &builder, // Returns 0 if this isn't an Int static unsigned isIntType(std::string name) { llvm::StringRef nameRef(name); - if (nameRef.starts_with("Bi") && nameRef.ends_with("_")) { + if (nameRef.starts_with("Bi") && nameRef.endswith("_")) { llvm::StringRef naturalRef = nameRef.drop_front(2).drop_back(); uint8_t natural; if (naturalRef.getAsInteger(10, natural)) {