...111 RecurrentTransformerEncoder.112 Inheriting classes should implement the following:113 - _get_attention_builder()114 - _get_attention_layer_class()115 - _get_encoder_class()116 - _get_encoder_layer_class()117 """118 def __init__(self):119 super(BaseTransformerEncoderBuilder, self).__init__()120 self._attention_builder = self._get_attention_builder()121 self._attention_type = "full"122 def _get_attention_builder(self):123 """Return an instance of the appropriate attention builder."""124 raise NotImplementedError()125 def _get_attention_layer_class(self):126 """Return the class for the layer that projects queries keys and127 values."""128 raise NotImplementedError()129 def _get_encoder_class(self):130 """Return the class for the transformer encoder."""131 raise NotImplementedError()132 def _get_encoder_layer_class(self):133 """Return the class for the transformer encoder layer."""134 raise NotImplementedError()135 @property136 def attention(self):137 """The attention builder instance."""138 return self._attention_builder139 @property140 def attention_type(self):141 """The attention implementation chosen."""142 return self._attention_type143 @attention_type.setter144 def attention_type(self, val):145 if not self._attention_builder.validate_attention_type(val):146 raise ValueError(("{!r} is not an available attention "147 "type").format(val))148 self._attention_type = val149 def __setattr__(self, key, val):150 # "protected" attributes are settable (probably from withing the class)151 if key[0] == "_":152 return super().__setattr__(key, val)153 # Existing attributes are settable but they might also be attention154 # parameters so try that as well155 fail_on_exception = True156 if hasattr(self, key):157 super().__setattr__(key, val)158 fail_on_exception = False159 # Non-existing "public" attributes may be attention parameters160 try:161 setattr(self._attention_builder, key, val)162 except:163 if fail_on_exception:164 raise165 def get(self):166 """Build the transformer and return it."""167 # Set the event dispatcher to the attention builder168 self.attention.event_dispatcher = self.event_dispatcher169 # Extract into local variables the classes to be used170 Encoder = self._get_encoder_class()171 EncoderLayer = self._get_encoder_layer_class()172 Attention = self._get_attention_layer_class()173 model_dimensions = self.value_dimensions*self.n_heads174 return Encoder(175 [176 EncoderLayer(177 Attention(178 self.attention.get(self.attention_type),179 model_dimensions,180 self.n_heads,181 d_keys=self.query_dimensions,182 d_values=self.value_dimensions,183 event_dispatcher=self.event_dispatcher184 ),185 model_dimensions,186 self.feed_forward_dimensions,187 self.dropout,188 self.activation,189 event_dispatcher=self.event_dispatcher190 )191 for _ in range(self.n_layers)192 ],193 (LayerNorm(model_dimensions) if self.final_normalization else None),194 event_dispatcher=self.event_dispatcher195 )196class TransformerEncoderBuilder(BaseTransformerEncoderBuilder):197 """Build a batch transformer encoder for training or processing of198 sequences all elements at a time.199 Example usage:200 builder = TransformerEncoderBuilder()201 builder.n_layers = 12202 builder.n_heads = 8203 builder.feed_forward_dimensions = 1024204 builder.query_dimensions = 64205 builder.value_dimensions = 64206 builder.dropout = 0.1207 builder.attention_dropout = 0.1208 builder.attention_type = "linear"209 transformer = builder.get()210 """211 def _get_attention_builder(self):212 """Return an instance of the appropriate attention builder."""213 return AttentionBuilder()214 def _get_attention_layer_class(self):215 """Return the class for the layer that projects queries keys and216 values."""217 return AttentionLayer218 def _get_encoder_class(self):219 """Return the class for the transformer encoder."""220 return TransformerEncoder221 def _get_encoder_layer_class(self):222 """Return the class for the transformer encoder layer."""223 return TransformerEncoderLayer224class RecurrentEncoderBuilder(BaseTransformerEncoderBuilder):225 """Build a transformer encoder for autoregressive processing of sequences.226 Example usage:227 builder = RecurrentEncoderBuilder()228 builder.n_layers = 12229 builder.n_heads = 8230 builder.feed_forward_dimensions = 1024231 builder.query_dimensions = 64232 builder.value_dimensions = 64233 builder.dropout = 0.1234 builder.attention_dropout = 0.1235 builder.attention_type = "linear"236 transformer = builder.get()237 """238 def _get_attention_builder(self):239 """Return an attention builder for recurrent attention."""240 return RecurrentAttentionBuilder()241 def _get_attention_layer_class(self):242 """Return the class for the recurrent layer that projects queries keys243 and values."""244 return RecurrentAttentionLayer245 def _get_encoder_class(self):246 """Return the class for the recurrent transformer encoder."""247 return RecurrentTransformerEncoder248 def _get_encoder_layer_class(self):249 """Return the class for the recurrent transformer encoder layer."""250 return RecurrentTransformerEncoderLayer251class BaseTransformerDecoderBuilder(BaseTransformerBuilder):252 """Similar to BaseTransformerEncoderBuilder implement the logic of253 building the transformer decoder without defining concrete layers.254 Inheriting classes should implement the following:255 - _get_self_attention_builder() and _get_cross_attention_builder()256 - _get_self_attention_layer_class() and _get_cross_attention_layer_class()257 - _get_decoder_class()258 - _get_decoder_layer_class()259 """...

