| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ |
| #define TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ |
| |
| #include "tensorflow/compiler/xla/service/heap_simulator.h" |
| #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" |
| |
| namespace xla { |
| |
| // This class contains pre-set assignments determined by memory space |
| // assignment. It contains two data structures: (1) a chunks vector that maps a |
| // defining HloPosition to a Chunk (offset and size), and (2) a sizes vector |
| // that maps the memory space to its size. If there is only one alternate memory |
| // space like there is currently, there will be one entry in sizes. |
| class PresetAssignments { |
| public: |
| PresetAssignments() = default; |
| |
| void add_chunk(const HloPosition& position, |
| const HeapSimulator::Chunk& chunk) { |
| chunks_.emplace_back(position, chunk); |
| } |
| |
| void add_size(int64 memory_space, int64 size) { |
| sizes_.emplace_back(memory_space, size); |
| } |
| |
| absl::Span<const std::pair<HloPosition, HeapSimulator::Chunk>> chunks() |
| const { |
| return chunks_; |
| } |
| |
| absl::Span<const std::pair<int64, int64>> sizes() const { return sizes_; } |
| |
| // Remove the chunks_ entry that corresponds to instruction. |
| void RemoveAssignmentForInstruction(const HloInstruction* instruction); |
| |
| private: |
| std::vector<std::pair<HloPosition, HeapSimulator::Chunk>> chunks_; |
| std::vector<std::pair<int64, int64>> sizes_; |
| }; |
| |
| // A wrapper class around HloCostAnalysis with additional knowledge about the |
| // bandwidths of different memory spaces. |
| class MemorySpaceAssignmentCostAnalysis { |
| public: |
| MemorySpaceAssignmentCostAnalysis( |
| const HloCostAnalysis& cost_analysis, |
| float async_copy_bandwidth_bytes_per_second, |
| float alternate_mem_bandwidth_bytes_per_second) |
| : cost_analysis_(cost_analysis), |
| async_copy_bandwidth_bytes_per_second_( |
| async_copy_bandwidth_bytes_per_second), |
| alternate_mem_bandwidth_bytes_per_second_( |
| alternate_mem_bandwidth_bytes_per_second) {} |
| |
| const HloCostAnalysis& cost_analysis() const { return cost_analysis_; } |
| |
| // Returns the elapsed time in seconds due to compute only. |
| float GetInstructionElapsedDueToCompute( |
| const HloInstruction& instruction) const; |
| |
| // Returns the elapsed time in seconds due to memory only. If |
| // operand_in_alternate_mem is provided or if output_in_alternate_mem is true, |
| // it will assume that operand or output will be in the alternate memory |
| // space. This is useful for calculating the benefit of placing the buffer in |
| // alternate memory. |
| float GetInstructionElapsedDueToMemory( |
| const HloInstruction& instruction, |
| absl::optional<int64> operand_in_alternate_mem = absl::nullopt, |
| bool output_in_alternate_mem = false) const; |
| |
| // Returns the estimated elapsed duration of the instruction in seconds. It |
| // assumes all operands and outputs of the instruction are in the default |
| // memory, except for the operand number that is in the alternate memory, if |
| // provided, or output if output_in_alternate_mem is true. |
| float GetInstructionElapsed( |
| const HloInstruction& instruction, |
| absl::optional<int64> operand_in_alternate_mem = absl::nullopt, |
| bool output_in_alternate_mem = false) const; |
| |
| // Returns the elapsed time it would take to asynchronously copy the shape |
| // from default to alternate memory space (or vice versa). |
| float GetAsyncCopyElapsed(const Shape& shape) const; |
| |
| private: |
| const HloCostAnalysis& cost_analysis_; |
| float async_copy_bandwidth_bytes_per_second_; |
| float alternate_mem_bandwidth_bytes_per_second_; |
| }; |
| |
| // Abstract base class that memory space assignment uses to pick prefetch |
| // intervals. |
| class PrefetchIntervalPicker { |
| public: |
| PrefetchIntervalPicker() = default; |
| virtual ~PrefetchIntervalPicker() = default; |
| |
| // Sets the instruction schedule. |
| virtual void SetInstructionSchedule( |
| const absl::flat_hash_map<const HloInstruction*, int64>& |
| instruction_schedule) { |
| instruction_schedule_ = &instruction_schedule; |
| } |
| |
| // Returns true if the buffer can be allocated in alternate memory space |
| // without any copies (prefetches). |
| virtual bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, |
| int64 start_time, |
| int64 end_time) const = 0; |
| |
| // Begins the iterator for the first start time of the prefetch. |
| virtual void Begin(const HloUse& use, int64 start_time, int64 end_time) = 0; |
| |
| // Advances the start time of the prefetch and returns that value. |
| virtual int64 Next() = 0; |
| |
| // Returns true if the available prefetch intervals have been exhausted. |
| virtual bool Done() const = 0; |
| |
| protected: |
| const absl::flat_hash_map<const HloInstruction*, int64>* |
| instruction_schedule_ = nullptr; |
| }; |
| |
| // Prefetch interval picker that uses instruction count to overlap asynchronous |
| // copies with independent computation. The min and max overlap counts describe |
| // the number of independent HLOs overlapped while a value is being prefetched |
| // into the alternate memory (between CopyStart and CopyDone HLO instructions). |
| // max_overlap_count attempts to prevent bringing tensors into the alternate |
| // memory too eagerly and hence occupying the space for other tensors which |
| // might use it. min_overlap_count attempts to prevent cases where tensors are |
| // prefetched into the alternate memory without sufficient time for the copy to |
| // take place. In those cases, it's just better to keep the tensor in the |
| // default memory instead of hurting the critical path with this copy that |
| // likely won't finish in time. |
| class InstructionCountPrefetchIntervalPicker : public PrefetchIntervalPicker { |
| public: |
| InstructionCountPrefetchIntervalPicker(int64 min_overlap_count, |
| int64 max_overlap_count) |
| : min_overlap_count_(min_overlap_count), |
| max_overlap_count_(max_overlap_count) {} |
| |
| bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, int64 start_time, |
| int64 end_time) const override; |
| |
| void Begin(const HloUse& use, int64 start_time, int64 end_time) override; |
| |
| int64 Next() override; |
| bool Done() const override; |
| |
| private: |
| int64 min_overlap_count_; |
| int64 max_overlap_count_; |
| int64 end_time_; |
| int64 current_prefetch_time_; |
| }; |
| |
| // Prefetch interval picker that uses cost analysis to overlap asynchronous |
| // copies with independent computation. It uses min/max (asynchronous copy |
| // duration) / (independent computation duration) ratios to guide whether the |
| // prefetch is within those bounds. It starts with the maximum allowed ratio |
| // (earliest prefetch) in Begin() and works its way for later and later prefetch |
| // with each Next() call until hitting the minimum ratio, in order not to hurt |
| // the critical path. |
| class CostAnalysisPrefetchIntervalPicker : public PrefetchIntervalPicker { |
| public: |
| CostAnalysisPrefetchIntervalPicker( |
| const MemorySpaceAssignmentCostAnalysis& cost_analysis, |
| float min_async_copy_to_overlap_ratio, |
| float max_async_copy_to_overlap_ratio) |
| : cost_analysis_(cost_analysis), |
| min_async_copy_to_overlap_ratio_(min_async_copy_to_overlap_ratio), |
| max_async_copy_to_overlap_ratio_(max_async_copy_to_overlap_ratio) {} |
| |
| void SetInstructionSchedule( |
| const absl::flat_hash_map<const HloInstruction*, int64>& |
| instruction_schedule) override; |
| |
| bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, int64 start_time, |
| int64 end_time) const override; |
| |
| void Begin(const HloUse& use, int64 start_time, int64 end_time) override; |
| |
| int64 Next() override; |
| bool Done() const override; |
| |
| private: |
| // Returns the elapsed time in seconds between the logical interval that |
| // corresponds to the instruction schedule. |
| float GetLogicalIntervalElapsed(int64 start_time, int64 end_time) const; |
| |
| // For performance reasons, we calculate the prefix sum of the elapsed time so |
| // that it's efficient to find the elapsed time in seconds in any logical |
| // interval. |
| std::vector<float> elapsed_time_cumsum_; |
| |
| const MemorySpaceAssignmentCostAnalysis& cost_analysis_; |
| float min_async_copy_to_overlap_ratio_; |
| float max_async_copy_to_overlap_ratio_; |
| |
| float async_copy_elapsed_; |
| float inst_elapsed_reduction_; |
| int64 end_logical_time_; |
| int64 current_logical_prefetch_time_; |
| }; |
| |
| // MemorySpaceAssignment assigns memory spaces (default or alternate) to each |
| // instruction in the module. It will greedily try placing as as many values in |
| // the alternate memory space as possible. It uses the heap simulator to |
| // determine the actual allocation offsets of values in the alternate memory |
| // space to account for fragmentation. The default memory space is assumed to be |
| // large enough to hold the values that could not be placed in the alternate |
| // memory space. |
| class MemorySpaceAssignment { |
| public: |
| using Chunk = HeapSimulator::Chunk; |
| using BufferInterval = GlobalDecreasingSizeBestFitHeap::BufferInterval; |
| using BufferIntervalCompare = |
| GlobalDecreasingSizeBestFitHeap::BufferIntervalCompare; |
| |
| // MemorySpaceAssignment uses a notion of a slow and large default memory |
| // space and a fast and small alternate memory space. |
| enum class MemorySpace { kDefault, kAlternate }; |
| |
| // This class represents an allocation that might either be in the default or |
| // alternate memory. An HloValue might live in multiple different allocations |
| // over its lifetime. The lifetimes of the allocations are defined using |
| // start_time and end_time, which corresponds to the instruction indexes in |
| // the flattened schedule. Each of these allocations might partially overlap |
| // with each other. CopyAllocation defined below represents asynchronous |
| // copies between Allocations. |
| // |
| // Consider an instruction Foo, and its users Bar and Baz, and the times given |
| // in terms of the flattened schedule of the entire module: |
| // |
| // Foo:10 |
| // / \ |
| // Bar:14 \ |
| // Baz:25 |
| // |
| // A valid memory space assignment could be like the following: |
| // |
| // Time: 10 ... 14 ... 25 |
| // Foo Bar Baz |
| // Alternate +-------+ +-----+ |
| // Default +---------------------+ |
| // ^ ^ ^ ^ |
| // | | | | |
| // evict evict prefetch prefetch |
| // start end start end |
| // |
| // This would be represented with: |
| // - Allocation(memory_space=kAlternate, start_time=10, end_time=14) |
| // - CopyAllocation(memory_space=kDefault, start_time=12, end_time=25) |
| // - CopyAllocation(memory_space=kAlternate, start_time=22, end_time=25) |
| class Allocation { |
| public: |
| Allocation(HloInstruction* instruction, HloPosition defining_position, |
| MemorySpace memory_space, Chunk chunk, int64 start_time, |
| int64 end_time) |
| : instruction_(instruction), |
| defining_position_(defining_position), |
| memory_space_(memory_space), |
| chunk_(chunk), |
| start_time_(start_time), |
| end_time_(end_time) {} |
| virtual ~Allocation() = default; |
| |
| virtual bool is_copy_allocation() const { return false; } |
| |
| // Adds a use to this allocation. |
| void AddUse(HloUse use); |
| |
| // Extends the end time of this allocation. |
| void Extend(int64 end_time) { end_time_ = end_time; } |
| |
| // After all of the time ranges for the allocations have been assigned, |
| // Process morphs the instructions affected to assign the memory spaces and |
| // insert asynchronous copy instructions if necessary. |
| virtual Status Process(MemorySpaceAssignment* memory_space_assignment); |
| |
| // Returns the instruction that produces this allocation. It might be |
| // different than the instruction in defining_position (e.g., a |
| // GetTupleElement instruction does not define the buffer). |
| virtual HloInstruction* instruction() const { return instruction_; } |
| |
| // Returns the defining position for this allocation. |
| virtual HloPosition defining_position() const { return defining_position_; } |
| |
| const std::vector<HloUse>& uses() const { return uses_; } |
| MemorySpace memory_space() const { return memory_space_; } |
| Chunk chunk() const { return chunk_; } |
| void set_start_time(int64 start_time) { start_time_ = start_time; } |
| int64 start_time() const { return start_time_; } |
| int64 end_time() const { return end_time_; } |
| |
| protected: |
| // Descend to the shape_index element of the tuple and replace that with |
| // new_instruction. |
| StatusOr<HloInstruction*> ReplaceTupleWith(HloInstruction* new_instruction, |
| HloInstruction* tuple, |
| ShapeIndex shape_index); |
| |
| HloInstruction* instruction_; |
| HloPosition defining_position_; |
| std::vector<HloUse> uses_; |
| std::vector<HloInstruction*> bitcasts_; |
| MemorySpace memory_space_; |
| Chunk chunk_; |
| int64 start_time_; |
| int64 end_time_; |
| }; |
| |
| // This class represents an allocation as a result of an asynchronous copy. |
| class CopyAllocation : public Allocation { |
| public: |
| CopyAllocation(const Allocation& prev_allocation, MemorySpace memory_space, |
| Chunk chunk, int64 start_time, int64 end_time) |
| : Allocation(/*instruction=*/nullptr, |
| /*defining_position=*/{nullptr, {}}, memory_space, chunk, |
| start_time, end_time), |
| prev_allocation_(prev_allocation), |
| copy_start_schedule_after_(start_time), |
| copy_done_schedule_before_(end_time) {} |
| |
| bool is_copy_allocation() const override { return true; } |
| |
| Status Process(MemorySpaceAssignment* memory_space_assignment) override; |
| |
| HloInstruction* instruction() const override { |
| // Unless explicitly set, the instruction of a copy allocation in |
| // retrieved from the previous allocation. |
| if (instruction_ != nullptr) { |
| return instruction_; |
| } else { |
| return prev_allocation_.instruction(); |
| } |
| } |
| |
| HloPosition defining_position() const override { |
| // Unless explicitly set, the defining position of a copy allocation in |
| // retrieved from the previous allocation. This is because we don't create |
| // new CopyStart/CopyDone instructions until later and the position should |
| // point to the previous (copy or otherwise) allocation's position for the |
| // original defining position. |
| if (defining_position_.instruction == nullptr) { |
| return prev_allocation_.defining_position(); |
| } else { |
| return defining_position_; |
| } |
| } |
| |
| HloInstruction* copy_start() const { return copy_start_; } |
| HloInstruction* copy_done() const { return copy_done_; } |
| |
| int64 copy_start_schedule_after() const { |
| return copy_start_schedule_after_; |
| } |
| int64 copy_done_schedule_before() const { |
| return copy_done_schedule_before_; |
| } |
| |
| void set_copy_start_schedule_after(int64 copy_start_schedule_after) { |
| copy_start_schedule_after_ = copy_start_schedule_after; |
| } |
| |
| private: |
| const Allocation& prev_allocation_; |
| // These variables define the scheduling boundaries where CopyStart and |
| // CopyDone can be scheduled. The earliest CopyStart can be scheduled is |
| // after copy_start_schedule_after_ and the latest CopyDone can be scheduled |
| // is before copy_done_schedule_before_. |
| int64 copy_start_schedule_after_; |
| int64 copy_done_schedule_before_; |
| HloInstruction* copy_start_; |
| HloInstruction* copy_done_; |
| }; |
| |
| using AllocationSequence = std::list<std::unique_ptr<Allocation>>; |
| using AllocationMap = |
| absl::flat_hash_map<const HloValue*, AllocationSequence>; |
| |
| // Runs the MemorySpaceAssignment pass. alternate_memory_space is the |
| // architecture-specific integer value that describes the alternate memory. |
| // max_size_in_bytes is the maximum size of the alternate memory. |
| // If a buffer_interval_compare is provided, we sort the buffers using that |
| // (otherwise, we use GlobalDecreasingSizeBestFitHeap::kSpatial). |
| // prefetch_interval_picker determines how early and how late can prefetches |
| // occur. alternate_memory_space_alignment_in_bytes is the alignment required |
| // in the alternate memory space, size_fn is the size function for buffer |
| // values, and is_allowed_in_alternate_mem can be used to prevent certain |
| // HloValues (e.g., based on the opcode) to be placed on the alternate memory. |
| // max_outstanding_async_copies specifies the upper bound for number of |
| // outstanding asynchronous copies, -1 for unlimited. |
| static StatusOr<std::unique_ptr<PresetAssignments>> Run( |
| HloModule* module, int64 alternate_memory_space, int64 max_size_in_bytes, |
| absl::optional<BufferIntervalCompare> buffer_interval_compare, |
| PrefetchIntervalPicker* prefetch_interval_picker, |
| int64 alternate_memory_space_alignment_in_bytes, |
| BufferValue::SizeFunction size_fn, |
| std::function<bool(const HloValue&)> is_allowed_in_alternate_mem, |
| int64 max_outstanding_async_copies = -1); |
| |
| // Returns the maximum number of outstanding asynchronous copies in the |
| // module. |
| static int64 CountMaximumOutstandingAsyncCopies(const HloModule& module); |
| |
| static BufferIntervalCompare GetMemoryBoundednessBufferIntervalCompare( |
| const MemorySpaceAssignmentCostAnalysis& cost_analysis); |
| |
| private: |
| MemorySpaceAssignment(HloModule* module, int64 alternate_memory_space) |
| : module_(module), |
| alternate_memory_space_(alternate_memory_space), |
| preset_assignments_(absl::make_unique<PresetAssignments>()) {} |
| |
| // Process calls Process methods of the allocations after the allocations have |
| // been finalized. |
| Status Process(); |
| |
| // Process() might have altered the computation graph by inserting kTuple and |
| // kGetTupleElement instructions. SimplifyGraph performs a simple DCE and |
| // tuple simplification operation (e.g., given GetTupleElement(Tuple(a, b), |
| // 1), simply forwards b). Runs to fixed point. |
| Status SimplifyGraph(); |
| |
| // FixSchedule inserts asynchronous copies in the schedule. |
| Status FixSchedule(); |
| |
| // Insert an instruction to the schedule, and make sure its dependencies |
| // (operands) are already in the schedule. If not, insert these operands |
| // before the instruction. |
| void EnsureInstructionAndOperandsInserted( |
| HloInstruction* new_instruction, HloInstructionSequence* new_sequence, |
| absl::flat_hash_set<HloInstruction*>* inserted_instructions) const; |
| |
| // Schedules asynchronous copies and ensures that the CopyStarts and their |
| // corresponding CopyDones follow the same order. |
| void ScheduleAsynchronousCopies(); |
| |
| // Add the position to the pending positions that will be colored as alternate |
| // memory. |
| void AddPositionInAlternateMemorySpace(HloPosition position); |
| |
| HloModule* module_; |
| int64 alternate_memory_space_; |
| std::unique_ptr<HloLiveRange> hlo_live_range_; |
| AllocationMap allocation_map_; |
| std::unique_ptr<PresetAssignments> preset_assignments_; |
| |
| // These maps hold vectors of new instructions that need to be scheduled after |
| // (or before) the instruction index in the key. FixSchedule uses these maps |
| // to modify and fix the schedule. |
| absl::flat_hash_map<int64, std::vector<HloInstruction*>> schedule_after_; |
| absl::flat_hash_map<int64, std::vector<HloInstruction*>> schedule_before_; |
| std::vector<HloPosition> pending_positions_in_alternate_mem_; |
| }; |
| |
| // This struct contains mandatory memory assignments at a given time. E.g., an |
| // input's required memory assignment time would correspond to the definition |
| // time of the parameter instruction, and an output's time would correspnd to |
| // the time of last use. |
| struct RequiredMemoryAssignment { |
| MemorySpaceAssignment::MemorySpace memory_space; |
| int64 time; |
| }; |
| |
| // This class inherits from GlobalDecreasingSizeBestFitHeap with a notion of |
| // maximum size. |
| class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { |
| public: |
| using IsAllowedInAlternateMemoryFunction = |
| std::function<bool(const HloValue&)>; |
| using MemorySpace = MemorySpaceAssignment::MemorySpace; |
| |
| AlternateMemoryBestFitHeap( |
| MemorySpaceAssignment::AllocationMap* allocation_map, |
| int64 max_size_in_bytes, |
| absl::optional<BufferIntervalCompare> buffer_interval_compare, |
| PrefetchIntervalPicker* prefetch_interval_picker, |
| const HloAliasAnalysis& alias_analysis, |
| const HloLiveRange& hlo_live_range, int64 alignment, |
| IsAllowedInAlternateMemoryFunction is_allowed_in_alternate_mem, |
| int64 max_outstanding_async_copies) |
| : GlobalDecreasingSizeBestFitHeap(alignment), |
| allocation_map_(allocation_map), |
| max_size_in_bytes_(max_size_in_bytes), |
| prefetch_interval_picker_(prefetch_interval_picker), |
| alias_analysis_(alias_analysis), |
| hlo_live_range_(hlo_live_range), |
| is_allowed_in_alternate_mem_(is_allowed_in_alternate_mem), |
| max_outstanding_async_copies_(max_outstanding_async_copies) { |
| // Override buffer interval compare if provided. |
| if (buffer_interval_compare) { |
| buffer_interval_compare_ = *buffer_interval_compare; |
| } |
| } |
| |
| HeapSimulator::Result Finish() override; |
| |
| private: |
| // Finds an allocation for the given interval. Internally, it will attempt to |
| // find a suitable chunk candidate within the heap size and prefetch interval |
| // limits, and append the new allocation(s) to allocations. The new |
| // allocations can be in default or alternate memory spaces, or can be |
| // prefetches or evictions. Returns true if successful. |
| bool FindAllocation(int64 start_time, int64 end_time, int64 last_use_time, |
| HloPosition defining_position, HloUse use, |
| const HloValue* buffer, int64 size, |
| MemorySpaceAssignment::AllocationSequence* allocations); |
| |
| // Try allocating in alternate memory without any copies. Returns true if |
| // successful. |
| bool TryAllocatingInAlternateMemoryNoCopy( |
| int64 start_time, int64 end_time, int64 last_use_time, |
| HloPosition defining_position, HloUse use, |
| BufferInterval alternate_mem_interval, |
| HloInstruction* non_bitcast_operand, |
| MemorySpaceAssignment::AllocationSequence* allocations); |
| |
| // Adds input and outputs as required assignments. |
| void AddInputAndOutputRequiredAssignments(); |
| |
| // Given a buffer interval, returns the colocated intervals. Unlike the |
| // similar GlobalDecreasingSizeBestFitHeap::GetTransitiveColocations, it |
| // returns the colocated intervals sorted by scheduled time. |
| std::vector<const BufferInterval*> GetSortedColocatedIntervals( |
| const BufferInterval& interval) const; |
| |
| // Since the allocations are recorded to the AllocationMap, we don't maintain |
| // result_ in GlobalDecreasingSizeBestFitHeap. Override AddToChunkMap to avoid |
| // unnecessarily adding the chunk to the chunk map. |
| void AddToChunkMap(const HloValue* buffer, Chunk chunk) override {} |
| |
| // Returns true if the addition of an asynchronous copy in the given time |
| // interval would violate the maximum number of asynchronous copies. |
| bool ViolatesMaximumOutstandingAsyncCopies(int64 start_time, |
| int64 end_time) const; |
| |
| // Adds an asynchronous copy to the allocations. |
| void AddAsyncCopy(const MemorySpaceAssignment::Allocation& prev_allocation, |
| MemorySpace memory_space, Chunk chunk, int64 start_time, |
| int64 end_time, |
| MemorySpaceAssignment::AllocationSequence* allocations); |
| |
| // These methods are used for delaying committing the chunk candidate until |
| // the entire live range of the buffer has been considered. |
| void AddToPendingChunks(const BufferInterval& buffer_interval, |
| const ChunkCandidate& chunk_candidate); |
| void CommitPendingChunks(); |
| |
| MemorySpaceAssignment::AllocationMap* allocation_map_; |
| int64 max_size_in_bytes_; |
| PrefetchIntervalPicker* prefetch_interval_picker_; |
| const HloAliasAnalysis& alias_analysis_; |
| const HloLiveRange& hlo_live_range_; |
| IsAllowedInAlternateMemoryFunction is_allowed_in_alternate_mem_; |
| // We use a interval tree to keep track of the number of outstanding |
| // asynchronous copies. |
| BufferIntervalTree async_copy_interval_tree_; |
| int64 max_outstanding_async_copies_; |
| std::vector<std::pair<BufferInterval, ChunkCandidate>> pending_chunks_; |
| std::vector<std::pair<int64, int64>> pending_async_copies_; |
| // This map contains required memory assignments for HloValues (e.g., input |
| // and outputs). |
| absl::flat_hash_map<const HloValue*, std::vector<RequiredMemoryAssignment>> |
| required_assignments_; |
| }; |
| |
| } // namespace xla |
| |
| #endif // TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_H_ |