| //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Source locations are stored pervasively in the AST, making up a third of |
| // the size of typical serialized files. Storing them efficiently is important. |
| // |
| // We use integers optimized by VBR-encoding, because: |
| // - when abbreviations cannot be used, VBR6 encoding is our only choice |
| // - in the worst case a SourceLocation can be ~any 32-bit number, but in |
| // practice they are highly predictable |
| // |
| // We encode the integer so that likely values encode as small numbers that |
| // turn into few VBR chunks: |
| // - the invalid sentinel location is a very common value: it encodes as 0 |
| // - the "macro or not" bit is stored at the bottom of the integer |
| // (rather than at the top, as in memory), so macro locations can have |
| // small representations. |
| // - related locations (e.g. of a left and right paren pair) are usually |
| // similar, so when encoding a sequence of locations we store only |
| // differences between successive elements. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include <climits> |
| #include "clang/Basic/SourceLocation.h" |
| |
| #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
| #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
| |
| namespace clang { |
| class SourceLocationSequence; |
| |
| /// Serialized encoding of SourceLocations without context. |
| /// Optimized to have small unsigned values (=> small after VBR encoding). |
| /// |
| // Macro locations have the top bit set, we rotate by one so it is the low bit. |
| class SourceLocationEncoding { |
| using UIntTy = SourceLocation::UIntTy; |
| constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); |
| |
| static UIntTy encodeRaw(UIntTy Raw) { |
| return (Raw << 1) | (Raw >> (UIntBits - 1)); |
| } |
| static UIntTy decodeRaw(UIntTy Raw) { |
| return (Raw >> 1) | (Raw << (UIntBits - 1)); |
| } |
| friend SourceLocationSequence; |
| |
| public: |
| static uint64_t encode(SourceLocation Loc, |
| SourceLocationSequence * = nullptr); |
| static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); |
| }; |
| |
| /// Serialized encoding of a sequence of SourceLocations. |
| /// |
| /// Optimized to produce small values when locations with the sequence are |
| /// similar. Each element can be delta-encoded against the last nonzero element. |
| /// |
| /// Sequences should be started by creating a SourceLocationSequence::State, |
| /// and then passed around as SourceLocationSequence*. Example: |
| /// |
| /// // establishes a sequence |
| /// void EmitTopLevelThing() { |
| /// SourceLocationSequence::State Seq; |
| /// EmitContainedThing(Seq); |
| /// EmitRecursiveThing(Seq); |
| /// } |
| /// |
| /// // optionally part of a sequence |
| /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { |
| /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
| /// } |
| /// |
| /// // establishes a sequence if there isn't one already |
| /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { |
| /// SourceLocationSequence::State Seq(ParentSeq); |
| /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
| /// EmitRecursiveThing(Seq); |
| /// } |
| /// |
| class SourceLocationSequence { |
| using UIntTy = SourceLocation::UIntTy; |
| using EncodedTy = uint64_t; |
| constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; |
| static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); |
| |
| // Prev stores the rotated last nonzero location. |
| UIntTy &Prev; |
| |
| // Zig-zag encoding turns small signed integers into small unsigned integers. |
| // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... |
| static UIntTy zigZag(UIntTy V) { |
| UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); |
| return Sign ^ (V << 1); |
| } |
| static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } |
| |
| SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} |
| |
| EncodedTy encodeRaw(UIntTy Raw) { |
| if (Raw == 0) |
| return 0; |
| UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); |
| if (Prev == 0) |
| return Prev = Rotated; |
| UIntTy Delta = Rotated - Prev; |
| Prev = Rotated; |
| // Exactly one 33 bit value is possible! (1 << 32). |
| // This is because we have two representations of zero: trivial & relative. |
| return 1 + EncodedTy{zigZag(Delta)}; |
| } |
| UIntTy decodeRaw(EncodedTy Encoded) { |
| if (Encoded == 0) |
| return 0; |
| if (Prev == 0) |
| return SourceLocationEncoding::decodeRaw(Prev = Encoded); |
| return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); |
| } |
| |
| public: |
| SourceLocation decode(EncodedTy Encoded) { |
| return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); |
| } |
| EncodedTy encode(SourceLocation Loc) { |
| return encodeRaw(Loc.getRawEncoding()); |
| } |
| |
| class State; |
| }; |
| |
| /// This object establishes a SourceLocationSequence. |
| class SourceLocationSequence::State { |
| UIntTy Prev = 0; |
| SourceLocationSequence Seq; |
| |
| public: |
| // If Parent is provided and non-null, then this root becomes part of that |
| // enclosing sequence instead of establishing a new one. |
| State(SourceLocationSequence *Parent = nullptr) |
| : Seq(Parent ? Parent->Prev : Prev) {} |
| |
| // Implicit conversion for uniform use of roots vs propagated sequences. |
| operator SourceLocationSequence *() { return &Seq; } |
| }; |
| |
| inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, |
| SourceLocationSequence *Seq) { |
| return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); |
| } |
| inline SourceLocation |
| SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { |
| return Seq ? Seq->decode(Encoded) |
| : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); |
| } |
| |
| } // namespace clang |
| #endif |