Back to Annotated Deep Learning Paper Implementations

Train Feedback Transformer

docs/transformers/feedback/experiment.html

latest4.1 KB
Original Source

hometransformersfeedback

View code on Github

#

Train Feedback Transformer

This trains a feedback transformer model for auto-regression. You can pick the original feedback transformer or the new version where the keys and values are precalculated.

Here's a Colab notebook for training a feedback transformer on Tiny Shakespeare dataset.

18importtorch19fromlabmlimportexperiment20fromlabml.configsimportoption21fromlabml.utils.pytorchimportget\_modules22fromlabml\_nn.experiments.nlp\_autoregressionimportNLPAutoRegressionConfigs23fromtorchimportnn

#

Auto regressive model

26classAutoregressiveModel(nn.Module):

#

31def\_\_init\_\_(self,n\_vocab:int,d\_model:int,transformer:nn.Module):32super().\_\_init\_\_()

#

Token embedding module

34self.src\_embed=nn.Embedding(n\_vocab,d\_model)35self.transformer=transformer36self.generator=nn.Linear(d\_model,n\_vocab)

#

38defforward(self,x:torch.Tensor):

#

Embed the tokens

40x=self.src\_embed(x)

#

Run it through the the transformer

42res=self.transformer(x)

#

Generate logits of the next token

44returnself.generator(res),None

#

Configurations

The default configs can and will be over-ridden when we start the experiment

47classConfigs(NLPAutoRegressionConfigs):

#

54model:AutoregressiveModel5556d\_model:int=51257heads:int=858dropout:float=0.059d\_ff:int=204860n\_layers:int=6

#

Create original feedback transformer.

63@option(Configs.model)64deffeedback\_transformer(c:Configs):

#

68fromlabml\_nn.transformers.feedbackimportFeedbackTransformer,FeedbackTransformerLayer,\69FeedbackAttention,FeedForward7071returnAutoregressiveModel(72c.n\_tokens,c.d\_model,73FeedbackTransformer(74FeedbackTransformerLayer(d\_model=c.d\_model,75attn=FeedbackAttention(c.heads,c.d\_model,c.dropout),76feed\_forward=FeedForward(c.d\_model,c.d\_ff,c.dropout),77dropout\_prob=c.dropout),78c.n\_layers)).to(c.device)

#

Create updated feedback transformer, with precalculated keys and values.

81@option(Configs.model)82deffeedback\_transformer\_kv(c:Configs):

#

86fromlabml\_nn.transformers.feedbackimportFeedbackTransformerKV,FeedbackTransformerLayer,\87FeedbackAttention,FeedForward8889returnAutoregressiveModel(90c.n\_tokens,c.d\_model,91FeedbackTransformerKV(92FeedbackTransformerLayer(d\_model=c.d\_model,93attn=FeedbackAttention(c.heads,c.d\_model,c.dropout,94is\_kv\_precomputed=True),95feed\_forward=FeedForward(c.d\_model,c.d\_ff,c.dropout),96dropout\_prob=c.dropout),97c.n\_layers,c.d\_model,c.heads)).to(c.device)

#

100defmain():

#

Create experiment

102experiment.create(name="feedback\_transformer")

#

Create configs

104conf=Configs()

#

Load configurations

106experiment.configs(conf,

#

A dictionary of configurations to override

108{'tokenizer':'character',109'text':'tiny\_shakespeare',110'optimizer.learning\_rate':1.0,111'optimizer.optimizer':'Noam',112'prompt':'It is',113'prompt\_separator':'',

#

Use feedback_transformer for original feedback transformer

116'model':'feedback\_transformer\_kv',117118'train\_loader':'shuffled\_train\_loader',119'valid\_loader':'shuffled\_valid\_loader',120121'seq\_len':128,122'epochs':128,123'batch\_size':64,124'inner\_iterations':25})

#

Set models for saving and loading

127experiment.add\_pytorch\_models(get\_modules(conf))

#

Start the experiment

130withexperiment.start():

#

Run the training loop

132conf.run()133134135if\_\_name\_\_=='\_\_main\_\_':136main()

labml.ai