| //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |
| #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |
| |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/MC/MCExpr.h" |
| #include "llvm/MC/MCInstrInfo.h" |
| #include "llvm/MC/MCParser/MCAsmLexer.h" |
| #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
| #include "llvm/MC/MCParser/MCAsmParserExtension.h" |
| #include "llvm/MC/MCTargetOptions.h" |
| #include "llvm/MC/SubtargetFeature.h" |
| #include "llvm/Support/SMLoc.h" |
| #include <cstdint> |
| #include <memory> |
| |
| namespace llvm { |
| |
| class MCInst; |
| class MCStreamer; |
| class MCSubtargetInfo; |
| template <typename T> class SmallVectorImpl; |
| |
| using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; |
| |
| enum AsmRewriteKind { |
| AOK_Align, // Rewrite align as .align. |
| AOK_EVEN, // Rewrite even as .even. |
| AOK_Emit, // Rewrite _emit as .byte. |
| AOK_CallInput, // Rewrite in terms of ${N:P}. |
| AOK_Input, // Rewrite in terms of $N. |
| AOK_Output, // Rewrite in terms of $N. |
| AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). |
| AOK_Label, // Rewrite local labels. |
| AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). |
| AOK_Skip, // Skip emission (e.g., offset/type operators). |
| AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] |
| }; |
| |
| const char AsmRewritePrecedence [] = { |
| 2, // AOK_Align |
| 2, // AOK_EVEN |
| 2, // AOK_Emit |
| 3, // AOK_Input |
| 3, // AOK_CallInput |
| 3, // AOK_Output |
| 5, // AOK_SizeDirective |
| 1, // AOK_Label |
| 5, // AOK_EndOfStatement |
| 2, // AOK_Skip |
| 2 // AOK_IntelExpr |
| }; |
| |
| // Represnt the various parts which makes up an intel expression, |
| // used for emitting compound intel expressions |
| struct IntelExpr { |
| bool NeedBracs; |
| int64_t Imm; |
| StringRef BaseReg; |
| StringRef IndexReg; |
| StringRef OffsetName; |
| unsigned Scale; |
| |
| IntelExpr() |
| : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), |
| OffsetName(StringRef()), Scale(1) {} |
| // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] |
| IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, |
| StringRef offsetName, int64_t imm, bool needBracs) |
| : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), |
| OffsetName(offsetName), Scale(1) { |
| if (scale) |
| Scale = scale; |
| } |
| bool hasBaseReg() const { return !BaseReg.empty(); } |
| bool hasIndexReg() const { return !IndexReg.empty(); } |
| bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } |
| bool hasOffset() const { return !OffsetName.empty(); } |
| // Normally we won't emit immediates unconditionally, |
| // unless we've got no other components |
| bool emitImm() const { return !(hasRegs() || hasOffset()); } |
| bool isValid() const { |
| return (Scale == 1) || |
| (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); |
| } |
| }; |
| |
| struct AsmRewrite { |
| AsmRewriteKind Kind; |
| SMLoc Loc; |
| unsigned Len; |
| bool Done; |
| int64_t Val; |
| StringRef Label; |
| IntelExpr IntelExp; |
| |
| public: |
| AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) |
| : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {} |
| AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) |
| : AsmRewrite(kind, loc, len) { Label = label; } |
| AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) |
| : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } |
| }; |
| |
| struct ParseInstructionInfo { |
| SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; |
| |
| ParseInstructionInfo() = default; |
| ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) |
| : AsmRewrites(rewrites) {} |
| }; |
| |
| enum OperandMatchResultTy { |
| MatchOperand_Success, // operand matched successfully |
| MatchOperand_NoMatch, // operand did not match |
| MatchOperand_ParseFail // operand matched but had errors |
| }; |
| |
| enum class DiagnosticPredicateTy { |
| Match, |
| NearMatch, |
| NoMatch, |
| }; |
| |
| // When an operand is parsed, the assembler will try to iterate through a set of |
| // possible operand classes that the operand might match and call the |
| // corresponding PredicateMethod to determine that. |
| // |
| // If there are two AsmOperands that would give a specific diagnostic if there |
| // is no match, there is currently no mechanism to distinguish which operand is |
| // a closer match. The DiagnosticPredicate distinguishes between 'completely |
| // no match' and 'near match', so the assembler can decide whether to give a |
| // specific diagnostic, or use 'InvalidOperand' and continue to find a |
| // 'better matching' diagnostic. |
| // |
| // For example: |
| // opcode opnd0, onpd1, opnd2 |
| // |
| // where: |
| // opnd2 could be an 'immediate of range [-8, 7]' |
| // opnd2 could be a 'register + shift/extend'. |
| // |
| // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes |
| // little sense to give a diagnostic that the operand should be an immediate |
| // in range [-8, 7]. |
| // |
| // This is a light-weight alternative to the 'NearMissInfo' approach |
| // below which collects *all* possible diagnostics. This alternative |
| // is optional and fully backward compatible with existing |
| // PredicateMethods that return a 'bool' (match or no match). |
| struct DiagnosticPredicate { |
| DiagnosticPredicateTy Type; |
| |
| explicit DiagnosticPredicate(bool Match) |
| : Type(Match ? DiagnosticPredicateTy::Match |
| : DiagnosticPredicateTy::NearMatch) {} |
| DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} |
| DiagnosticPredicate(const DiagnosticPredicate &) = default; |
| DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; |
| |
| operator bool() const { return Type == DiagnosticPredicateTy::Match; } |
| bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } |
| bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } |
| bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } |
| }; |
| |
| // When matching of an assembly instruction fails, there may be multiple |
| // encodings that are close to being a match. It's often ambiguous which one |
| // the programmer intended to use, so we want to report an error which mentions |
| // each of these "near-miss" encodings. This struct contains information about |
| // one such encoding, and why it did not match the parsed instruction. |
| class NearMissInfo { |
| public: |
| enum NearMissKind { |
| NoNearMiss, |
| NearMissOperand, |
| NearMissFeature, |
| NearMissPredicate, |
| NearMissTooFewOperands, |
| }; |
| |
| // The encoding is valid for the parsed assembly string. This is only used |
| // internally to the table-generated assembly matcher. |
| static NearMissInfo getSuccess() { return NearMissInfo(); } |
| |
| // The instruction encoding is not valid because it requires some target |
| // features that are not currently enabled. MissingFeatures has a bit set for |
| // each feature that the encoding needs but which is not enabled. |
| static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { |
| NearMissInfo Result; |
| Result.Kind = NearMissFeature; |
| Result.Features = MissingFeatures; |
| return Result; |
| } |
| |
| // The instruction encoding is not valid because the target-specific |
| // predicate function returned an error code. FailureCode is the |
| // target-specific error code returned by the predicate. |
| static NearMissInfo getMissedPredicate(unsigned FailureCode) { |
| NearMissInfo Result; |
| Result.Kind = NearMissPredicate; |
| Result.PredicateError = FailureCode; |
| return Result; |
| } |
| |
| // The instruction encoding is not valid because one (and only one) parsed |
| // operand is not of the correct type. OperandError is the error code |
| // relating to the operand class expected by the encoding. OperandClass is |
| // the type of the expected operand. Opcode is the opcode of the encoding. |
| // OperandIndex is the index into the parsed operand list. |
| static NearMissInfo getMissedOperand(unsigned OperandError, |
| unsigned OperandClass, unsigned Opcode, |
| unsigned OperandIndex) { |
| NearMissInfo Result; |
| Result.Kind = NearMissOperand; |
| Result.MissedOperand.Error = OperandError; |
| Result.MissedOperand.Class = OperandClass; |
| Result.MissedOperand.Opcode = Opcode; |
| Result.MissedOperand.Index = OperandIndex; |
| return Result; |
| } |
| |
| // The instruction encoding is not valid because it expects more operands |
| // than were parsed. OperandClass is the class of the expected operand that |
| // was not provided. Opcode is the instruction encoding. |
| static NearMissInfo getTooFewOperands(unsigned OperandClass, |
| unsigned Opcode) { |
| NearMissInfo Result; |
| Result.Kind = NearMissTooFewOperands; |
| Result.TooFewOperands.Class = OperandClass; |
| Result.TooFewOperands.Opcode = Opcode; |
| return Result; |
| } |
| |
| operator bool() const { return Kind != NoNearMiss; } |
| |
| NearMissKind getKind() const { return Kind; } |
| |
| // Feature flags required by the instruction, that the current target does |
| // not have. |
| const FeatureBitset& getFeatures() const { |
| assert(Kind == NearMissFeature); |
| return Features; |
| } |
| // Error code returned by the target predicate when validating this |
| // instruction encoding. |
| unsigned getPredicateError() const { |
| assert(Kind == NearMissPredicate); |
| return PredicateError; |
| } |
| // MatchClassKind of the operand that we expected to see. |
| unsigned getOperandClass() const { |
| assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); |
| return MissedOperand.Class; |
| } |
| // Opcode of the encoding we were trying to match. |
| unsigned getOpcode() const { |
| assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); |
| return MissedOperand.Opcode; |
| } |
| // Error code returned when validating the operand. |
| unsigned getOperandError() const { |
| assert(Kind == NearMissOperand); |
| return MissedOperand.Error; |
| } |
| // Index of the actual operand we were trying to match in the list of parsed |
| // operands. |
| unsigned getOperandIndex() const { |
| assert(Kind == NearMissOperand); |
| return MissedOperand.Index; |
| } |
| |
| private: |
| NearMissKind Kind; |
| |
| // These two structs share a common prefix, so we can safely rely on the fact |
| // that they overlap in the union. |
| struct MissedOpInfo { |
| unsigned Class; |
| unsigned Opcode; |
| unsigned Error; |
| unsigned Index; |
| }; |
| |
| struct TooFewOperandsInfo { |
| unsigned Class; |
| unsigned Opcode; |
| }; |
| |
| union { |
| FeatureBitset Features; |
| unsigned PredicateError; |
| MissedOpInfo MissedOperand; |
| TooFewOperandsInfo TooFewOperands; |
| }; |
| |
| NearMissInfo() : Kind(NoNearMiss) {} |
| }; |
| |
| /// MCTargetAsmParser - Generic interface to target specific assembly parsers. |
| class MCTargetAsmParser : public MCAsmParserExtension { |
| public: |
| enum MatchResultTy { |
| Match_InvalidOperand, |
| Match_InvalidTiedOperand, |
| Match_MissingFeature, |
| Match_MnemonicFail, |
| Match_Success, |
| Match_NearMisses, |
| FIRST_TARGET_MATCH_RESULT_TY |
| }; |
| |
| protected: // Can only create subclasses. |
| MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, |
| const MCInstrInfo &MII); |
| |
| /// Create a copy of STI and return a non-const reference to it. |
| MCSubtargetInfo ©STI(); |
| |
| /// AvailableFeatures - The current set of available features. |
| FeatureBitset AvailableFeatures; |
| |
| /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? |
| bool ParsingMSInlineAsm = false; |
| |
| /// SemaCallback - The Sema callback implementation. Must be set when parsing |
| /// ms-style inline assembly. |
| MCAsmParserSemaCallback *SemaCallback = nullptr; |
| |
| /// Set of options which affects instrumentation of inline assembly. |
| MCTargetOptions MCOptions; |
| |
| /// Current STI. |
| const MCSubtargetInfo *STI; |
| |
| const MCInstrInfo &MII; |
| |
| public: |
| MCTargetAsmParser(const MCTargetAsmParser &) = delete; |
| MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; |
| |
| ~MCTargetAsmParser() override; |
| |
| const MCSubtargetInfo &getSTI() const; |
| |
| const FeatureBitset& getAvailableFeatures() const { |
| return AvailableFeatures; |
| } |
| void setAvailableFeatures(const FeatureBitset& Value) { |
| AvailableFeatures = Value; |
| } |
| |
| bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } |
| void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } |
| |
| MCTargetOptions getTargetOptions() const { return MCOptions; } |
| |
| void setSemaCallback(MCAsmParserSemaCallback *Callback) { |
| SemaCallback = Callback; |
| } |
| |
| // Target-specific parsing of expression. |
| virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { |
| return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); |
| } |
| |
| virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, |
| SMLoc &EndLoc) = 0; |
| |
| /// tryParseRegister - parse one register if possible |
| /// |
| /// Check whether a register specification can be parsed at the current |
| /// location, without failing the entire parse if it can't. Must not consume |
| /// tokens if the parse fails. |
| virtual OperandMatchResultTy |
| tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0; |
| |
| /// ParseInstruction - Parse one assembly instruction. |
| /// |
| /// The parser is positioned following the instruction name. The target |
| /// specific instruction parser should parse the entire instruction and |
| /// construct the appropriate MCInst, or emit an error. On success, the entire |
| /// line should be parsed up to and including the end-of-statement token. On |
| /// failure, the parser is not required to read to the end of the line. |
| // |
| /// \param Name - The instruction name. |
| /// \param NameLoc - The source location of the name. |
| /// \param Operands [out] - The list of parsed operands, this returns |
| /// ownership of them to the caller. |
| /// \return True on failure. |
| virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
| SMLoc NameLoc, OperandVector &Operands) = 0; |
| virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
| AsmToken Token, OperandVector &Operands) { |
| return ParseInstruction(Info, Name, Token.getLoc(), Operands); |
| } |
| |
| /// ParseDirective - Parse a target specific assembler directive |
| /// |
| /// The parser is positioned following the directive name. The target |
| /// specific directive parser should parse the entire directive doing or |
| /// recording any target specific work, or return true and do nothing if the |
| /// directive is not target specific. If the directive is specific for |
| /// the target, the entire line is parsed up to and including the |
| /// end-of-statement token and false is returned. |
| /// |
| /// \param DirectiveID - the identifier token of the directive. |
| virtual bool ParseDirective(AsmToken DirectiveID) = 0; |
| |
| /// MatchAndEmitInstruction - Recognize a series of operands of a parsed |
| /// instruction as an actual MCInst and emit it to the specified MCStreamer. |
| /// This returns false on success and returns true on failure to match. |
| /// |
| /// On failure, the target parser is responsible for emitting a diagnostic |
| /// explaining the match failure. |
| virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
| OperandVector &Operands, MCStreamer &Out, |
| uint64_t &ErrorInfo, |
| bool MatchingInlineAsm) = 0; |
| |
| /// Allows targets to let registers opt out of clobber lists. |
| virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } |
| |
| /// Allow a target to add special case operand matching for things that |
| /// tblgen doesn't/can't handle effectively. For example, literal |
| /// immediates on ARM. TableGen expects a token operand, but the parser |
| /// will recognize them as immediates. |
| virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, |
| unsigned Kind) { |
| return Match_InvalidOperand; |
| } |
| |
| /// Validate the instruction match against any complex target predicates |
| /// before rendering any operands to it. |
| virtual unsigned |
| checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { |
| return Match_Success; |
| } |
| |
| /// checkTargetMatchPredicate - Validate the instruction match against |
| /// any complex target predicates not expressible via match classes. |
| virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { |
| return Match_Success; |
| } |
| |
| virtual void convertToMapAndConstraints(unsigned Kind, |
| const OperandVector &Operands) = 0; |
| |
| /// Returns whether two registers are equal and is used by the tied-operands |
| /// checks in the AsmMatcher. This method can be overridden allow e.g. a |
| /// sub- or super-register as the tied operand. |
| virtual bool regsEqual(const MCParsedAsmOperand &Op1, |
| const MCParsedAsmOperand &Op2) const { |
| assert(Op1.isReg() && Op2.isReg() && "Operands not all regs"); |
| return Op1.getReg() == Op2.getReg(); |
| } |
| |
| // Return whether this parser uses assignment statements with equals tokens |
| virtual bool equalIsAsmAssignment() { return true; }; |
| // Return whether this start of statement identifier is a label |
| virtual bool isLabel(AsmToken &Token) { return true; }; |
| // Return whether this parser accept star as start of statement |
| virtual bool starIsStartOfStatement() { return false; }; |
| |
| virtual const MCExpr *applyModifierToExpr(const MCExpr *E, |
| MCSymbolRefExpr::VariantKind, |
| MCContext &Ctx) { |
| return nullptr; |
| } |
| |
| // For actions that have to be performed before a label is emitted |
| virtual void doBeforeLabelEmit(MCSymbol *Symbol) {} |
| |
| virtual void onLabelParsed(MCSymbol *Symbol) {} |
| |
| /// Ensure that all previously parsed instructions have been emitted to the |
| /// output streamer, if the target does not emit them immediately. |
| virtual void flushPendingInstructions(MCStreamer &Out) {} |
| |
| virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, |
| AsmToken::TokenKind OperatorToken, |
| MCContext &Ctx) { |
| return nullptr; |
| } |
| |
| // For any checks or cleanups at the end of parsing. |
| virtual void onEndOfFile() {} |
| }; |
| |
| } // end namespace llvm |
| |
| #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H |