blob: ff16a122e4960d7958362b7ce1edf246b5c42494 [file] [log] [blame]
//===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Source locations are stored pervasively in the AST, making up a third of
// the size of typical serialized files. Storing them efficiently is important.
//
// We use integers optimized by VBR-encoding, because:
// - when abbrevations cannot be used, VBR6 encoding is our only choice
// - in the worst case a SourceLocation can be ~any 32-bit number, but in
// practice they are highly predictable
//
// We encode the integer so that likely values encode as small numbers that
// turn into few VBR chunks:
// - the invalid sentinel location is a very common value: it encodes as 0
// - the "macro or not" bit is stored at the bottom of the integer
// (rather than at the top, as in memory), so macro locations can have
// small representations.
// - related locations (e.g. of a left and right paren pair) are usually
// similar, so when encoding a sequence of locations we store only
// differences between successive elements.
//
//===----------------------------------------------------------------------===//
#include <climits>
#include "clang/Basic/SourceLocation.h"
#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
namespace clang {
class SourceLocationSequence;
/// Serialized encoding of SourceLocations without context.
/// Optimized to have small unsigned values (=> small after VBR encoding).
///
// Macro locations have the top bit set, we rotate by one so it is the low bit.
class SourceLocationEncoding {
using UIntTy = SourceLocation::UIntTy;
constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
static UIntTy encodeRaw(UIntTy Raw) {
return (Raw << 1) | (Raw >> (UIntBits - 1));
}
static UIntTy decodeRaw(UIntTy Raw) {
return (Raw >> 1) | (Raw << (UIntBits - 1));
}
friend SourceLocationSequence;
public:
static uint64_t encode(SourceLocation Loc,
SourceLocationSequence * = nullptr);
static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
};
/// Serialized encoding of a sequence of SourceLocations.
///
/// Optimized to produce small values when locations with the sequence are
/// similar. Each element can be delta-encoded against the last nonzero element.
///
/// Sequences should be started by creating a SourceLocationSequence::State,
/// and then passed around as SourceLocationSequence*. Example:
///
/// // establishes a sequence
/// void EmitTopLevelThing() {
/// SourceLocationSequence::State Seq;
/// EmitContainedThing(Seq);
/// EmitRecursiveThing(Seq);
/// }
///
/// // optionally part of a sequence
/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
/// }
///
/// // establishes a sequence if there isn't one already
/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
/// SourceLocationSequence::State Seq(ParentSeq);
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
/// EmitRecursiveThing(Seq);
/// }
///
class SourceLocationSequence {
using UIntTy = SourceLocation::UIntTy;
using EncodedTy = uint64_t;
constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
// Prev stores the rotated last nonzero location.
UIntTy &Prev;
// Zig-zag encoding turns small signed integers into small unsigned integers.
// 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
static UIntTy zigZag(UIntTy V) {
UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
return Sign ^ (V << 1);
}
static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
EncodedTy encodeRaw(UIntTy Raw) {
if (Raw == 0)
return 0;
UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
if (Prev == 0)
return Prev = Rotated;
UIntTy Delta = Rotated - Prev;
Prev = Rotated;
// Exactly one 33 bit value is possible! (1 << 32).
// This is because we have two representations of zero: trivial & relative.
return 1 + EncodedTy{zigZag(Delta)};
}
UIntTy decodeRaw(EncodedTy Encoded) {
if (Encoded == 0)
return 0;
if (Prev == 0)
return SourceLocationEncoding::decodeRaw(Prev = Encoded);
return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
}
public:
SourceLocation decode(EncodedTy Encoded) {
return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
}
EncodedTy encode(SourceLocation Loc) {
return encodeRaw(Loc.getRawEncoding());
}
class State;
};
/// This object establishes a SourceLocationSequence.
class SourceLocationSequence::State {
UIntTy Prev = 0;
SourceLocationSequence Seq;
public:
// If Parent is provided and non-null, then this root becomes part of that
// enclosing sequence instead of establishing a new one.
State(SourceLocationSequence *Parent = nullptr)
: Seq(Parent ? Parent->Prev : Prev) {}
// Implicit conversion for uniform use of roots vs propagated sequences.
operator SourceLocationSequence *() { return &Seq; }
};
inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
SourceLocationSequence *Seq) {
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
}
inline SourceLocation
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
return Seq ? Seq->decode(Encoded)
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
}
} // namespace clang
#endif