Back to Annotated Deep Learning Paper Implementations

Transformer Auto-Regression Experiment

docs/transformers/basic/autoregressive_experiment.html

latest4.8 KB
Original Source

hometransformersbasic

View code on Github

#

Transformer Auto-Regression Experiment

This trains a simple transformer introduced in Attention Is All You Need on an NLP auto-regression task (with Tiny Shakespeare dataset).

16importtorch17fromtorchimportnn1819fromlabmlimportexperiment20fromlabml.configsimportoption21fromlabml\_nn.experiments.nlp\_autoregressionimportNLPAutoRegressionConfigs22fromlabml\_nn.transformersimportTransformerConfigs,Encoder23fromlabml\_nn.transformers.utilsimportsubsequent\_mask

#

Auto-Regressive model

26classAutoregressiveTransformer(nn.Module):

#

30def\_\_init\_\_(self,encoder:Encoder,src\_embed:nn.Module,generator:nn.Module):

#

37super().\_\_init\_\_()38self.src\_embed=src\_embed39self.encoder=encoder40self.generator=generator

#

The mask will be initialized on the first call

43self.mask=None

#

45defforward(self,x:torch.Tensor):

#

Create subsequent mask if mask is not initialized or if the size of the mask is different

48ifself.maskisNoneorself.mask.size(0)!=len(x):

#

Subsequent mask, will mask out tokens from seeing future tokens

50self.mask=subsequent\_mask(len(x)).to(x.device)

#

Get the token embeddings with positional encodings

52x=self.src\_embed(x)

#

Transformer encoder

54x=self.encoder(x,self.mask)

#

Get logits

56x=self.generator(x)

#

Return results (second value is for state, since our trainer is used with RNNs also)

60returnx,None

#

Configurations

This inherits from NLPAutoRegressionConfigs

63classConfigs(NLPAutoRegressionConfigs):

#

GPT model

72model:AutoregressiveTransformer

#

Transformer

74transformer:TransformerConfigs

#

Transformer configurations

77@option(Configs.transformer,'Transformer')78def\_transformer\_configs(c:Configs):

#

We use our configurable transformer implementation

85conf=TransformerConfigs()

#

Set the vocabulary sizes for embeddings and generating logits

87conf.n\_src\_vocab=c.n\_tokens88conf.n\_tgt\_vocab=c.n\_tokens

#

90conf.d\_model=c.d\_model

#

93returnconf

#

Create GPT model and initialize weights

96@option(Configs.model)97def\_model(c:Configs):

#

101m=AutoregressiveTransformer(c.transformer.encoder,102c.transformer.src\_embed,103c.transformer.generator).to(c.device)104105returnm

#

108defmain():

#

Create experiment

110experiment.create(name="transformer")

#

Create configs

112conf=Configs()

#

Override configurations

114experiment.configs(conf,{

#

Use character level tokenizer

116'tokenizer':'character',

#

Prompt separator is blank

118'prompt\_separator':'',

#

Starting prompt for sampling

120'prompt':'It is ',

#

Use Tiny Shakespeare dataset

122'text':'tiny\_shakespeare',

#

Use a context size of 256

125'seq\_len':512,

#

Train for 32 epochs

127'epochs':32,

#

Batch size 32

129'batch\_size':16,

#

Switch between training and validation for 10 times per epoch

132'inner\_iterations':10,

#

Model size

135'd\_model':256,136'transformer.n\_heads':16,137'transformer.ffn.d\_ff':1024,

#

Use Noam optimizer

140'optimizer.optimizer':'Noam',141'optimizer.learning\_rate':1.,142})

#

Set models for saving and loading

145experiment.add\_pytorch\_models({'model':conf.model})

#

Start the experiment

148withexperiment.start():

#

Run training

150conf.run()

#

154if\_\_name\_\_=='\_\_main\_\_':155main()

labml.ai