import numpy as np
import tensorflow as tf
from tensorflow.keras .models import Sequential
from tensorflow.keras .layers import Embedding, SimpleRNN, Dense
text= "This is a sample text for language modeling using RNN."
chars= sorted ( set ( text) )
char_to_index= { char: index for index, char in enumerate ( chars) }
index_to_char= { index: char for index, char in enumerate ( chars) }
text_indices= [ char_to_index[ char] for char in text]
seq_length, sequences, next_char= 20 , [ ] , [ ]
for i in range ( 0 , len ( text_indices) -seq_length) :
sequences.append ( text_indices[ i:i+seq_length] )
next_char.append ( text_indices[ i+seq_length] )
X, y= np.array ( sequences) , np.array ( next_char)
17
model= Sequential( [ Embedding( input_dim= len ( chars) , output_dim= 50 , input_length= seq_length) , SimpleRNN( 100 , return_sequences= False ) , Dense( len ( chars) , activation= "softmax" ) ] )
model.compile ( loss= "sparse_categorical_crossentropy" , optimizer= "adam" )
model.fit ( X, y, batch_size= 64 , epochs= 20 )
seed_text= "This is a sample te"
generated_text= seed_text
num_chars_to_generate = 100
for _ in range ( num_chars_to_generate) :
seed_indices= [ char_to_index[ char] for char in seed_text]
if len ( seed_indices) < seq_length:
diff= seq_length-len ( seed_indices)
seed_indices= [ 0 ] *diff+seed_indices
seed_indices= np.array ( seed_indices) .reshape ( 1 , -1 )
next_index= model.predict ( seed_indices) .argmax ( )
next_char= index_to_char[ next_index]
generated_text+= next_char
seed_text= seed_text[ 1 :] +next_char
print ( generated_text)
aW1wb3J0IG51bXB5IGFzIG5wCmltcG9ydCB0ZW5zb3JmbG93IGFzIHRmCmZyb20gdGVuc29yZmxvdy5rZXJhcy5tb2RlbHMgaW1wb3J0IFNlcXVlbnRpYWwKZnJvbSB0ZW5zb3JmbG93LmtlcmFzLmxheWVycyBpbXBvcnQgRW1iZWRkaW5nLCBTaW1wbGVSTk4sIERlbnNlCnRleHQ9IlRoaXMgaXMgYSBzYW1wbGUgdGV4dCBmb3IgbGFuZ3VhZ2UgbW9kZWxpbmcgdXNpbmcgUk5OLiIKY2hhcnM9c29ydGVkKHNldCh0ZXh0KSkKY2hhcl90b19pbmRleD17Y2hhcjogaW5kZXggZm9yIGluZGV4LCBjaGFyIGluIGVudW1lcmF0ZShjaGFycyl9CmluZGV4X3RvX2NoYXI9e2luZGV4OiBjaGFyIGZvciBpbmRleCwgY2hhciBpbiBlbnVtZXJhdGUoY2hhcnMpfQp0ZXh0X2luZGljZXM9W2NoYXJfdG9faW5kZXhbY2hhcl0gZm9yIGNoYXIgaW4gdGV4dF0Kc2VxX2xlbmd0aCxzZXF1ZW5jZXMsbmV4dF9jaGFyPTIwLFtdLFtdCmZvciBpIGluIHJhbmdlKDAsbGVuKHRleHRfaW5kaWNlcyktc2VxX2xlbmd0aCk6CiBzZXF1ZW5jZXMuYXBwZW5kKHRleHRfaW5kaWNlc1tpOmkrc2VxX2xlbmd0aF0pCiBuZXh0X2NoYXIuYXBwZW5kKHRleHRfaW5kaWNlc1tpK3NlcV9sZW5ndGhdKQpYLHk9bnAuYXJyYXkoc2VxdWVuY2VzKSxucC5hcnJheShuZXh0X2NoYXIpCjE3Cm1vZGVsPVNlcXVlbnRpYWwoW0VtYmVkZGluZyhpbnB1dF9kaW09bGVuKGNoYXJzKSxvdXRwdXRfZGltPTUwLGlucHV0X2xlbmd0aD1zZXFfbGVuZ3RoKSxTaW1wbGVSTk4oMTAwLHJldHVybl9zZXF1ZW5jZXM9RmFsc2UpLERlbnNlKGxlbihjaGFycyksYWN0aXZhdGlvbj0ic29mdG1heCIpXSkKbW9kZWwuY29tcGlsZShsb3NzPSJzcGFyc2VfY2F0ZWdvcmljYWxfY3Jvc3NlbnRyb3B5IixvcHRpbWl6ZXI9ImFkYW0iKQptb2RlbC5maXQoWCx5LGJhdGNoX3NpemU9NjQsZXBvY2hzPTIwKQpzZWVkX3RleHQ9IlRoaXMgaXMgYSBzYW1wbGUgdGUiCmdlbmVyYXRlZF90ZXh0PXNlZWRfdGV4dApudW1fY2hhcnNfdG9fZ2VuZXJhdGUgPSAxMDAKZm9yIF8gaW4gcmFuZ2UobnVtX2NoYXJzX3RvX2dlbmVyYXRlKToKIHNlZWRfaW5kaWNlcz1bY2hhcl90b19pbmRleFtjaGFyXSBmb3IgY2hhciBpbiBzZWVkX3RleHRdCiBpZiBsZW4oc2VlZF9pbmRpY2VzKTxzZXFfbGVuZ3RoOgogICAgIGRpZmY9c2VxX2xlbmd0aC1sZW4oc2VlZF9pbmRpY2VzKQogICAgIHNlZWRfaW5kaWNlcz1bMF0qZGlmZitzZWVkX2luZGljZXMKIHNlZWRfaW5kaWNlcz1ucC5hcnJheShzZWVkX2luZGljZXMpLnJlc2hhcGUoMSwtMSkKIG5leHRfaW5kZXg9bW9kZWwucHJlZGljdChzZWVkX2luZGljZXMpLmFyZ21heCgpCiBuZXh0X2NoYXI9aW5kZXhfdG9fY2hhcltuZXh0X2luZGV4XQogZ2VuZXJhdGVkX3RleHQrPW5leHRfY2hhcgogc2VlZF90ZXh0PXNlZWRfdGV4dFsxOl0rbmV4dF9jaGFyCnByaW50KGdlbmVyYXRlZF90ZXh0KQ==