|  | syntax = "proto2"; | 
|  |  | 
|  | package caffe2; | 
|  |  | 
|  | // Hierarchical Softmax protobuffer convention: | 
|  | // The HSM operator requires a hierarchy of vocabulary words in the form of a | 
|  | // tree from the user. This tree is expressed using the proto format. | 
|  | // TreeProto points to the root NodeProto which can recursively contain children | 
|  | // NodeProtos (internal nodes) or word_ids (leaf nodes). | 
|  |  | 
|  | // The aforementioned TreeProto is internally translated into a list of word_ids | 
|  | // tagged with a list of NodeProtos that lie in the path from the root to that | 
|  | // word_id using hsm_util.create_hierarchy(tree_proto). | 
|  | // Specifically, HierarchyProto contains a list of PathProtos. Each PathProto | 
|  | // belongs to a word_id and contains a list of PathNodeProtos. Each | 
|  | // PathNodeProto contains information about the number of children the node has | 
|  | // (length), the index of the child node that lies in the path from root to | 
|  | // word_id (target) and a cumulative sum of children nodes (index; this acts as | 
|  | // the weight parameter matrix offset). | 
|  |  | 
|  | // Each node in the hierarchy contains links to either leaf nodes or more | 
|  | // non-terminal nodes | 
|  | message NodeProto { | 
|  | // Links to non-terminal children nodes | 
|  | repeated NodeProto children = 1; | 
|  | // Links to terminal (leaf) nodes | 
|  | repeated int32 word_ids = 2; | 
|  | optional int32 offset = 3; | 
|  | optional string name = 4; | 
|  | repeated float scores = 5; | 
|  | } | 
|  |  | 
|  | // Protobuf format to accept hierarchy for hierarchical softmax operator. | 
|  | // TreeProto points to the root node. | 
|  | message TreeProto { | 
|  | optional NodeProto root_node = 1; | 
|  | } | 
|  |  | 
|  | // Internal Protobuf format which represents the path in the tree hierarchy for | 
|  | // each word in the vocabulary. | 
|  | message HierarchyProto { | 
|  | optional int32 size = 1; | 
|  | repeated PathProto paths = 2; | 
|  | } | 
|  |  | 
|  | // Each PathProto belongs to a word and is an array of nodes in the | 
|  | // path from the root to the leaf (which is the word itself) in the tree. | 
|  | message PathProto { | 
|  | optional int32 word_id = 1; | 
|  | repeated PathNodeProto path_nodes = 2; | 
|  | } | 
|  |  | 
|  | // Represents a node in the path from the root node all the way down to the | 
|  | // word (leaf). | 
|  | message PathNodeProto { | 
|  | // Parameter matrix offset for this node | 
|  | optional int32 index = 1; | 
|  | // Number of children | 
|  | optional int32 length = 2; | 
|  | // Index of the next node in the path | 
|  | optional int32 target = 3; | 
|  | } |