import numpy as np
import tensorflow as tf
from tensorflow.keras .models import Sequential
from tensorflow.keras .layers import Embedding, SimpleRNN, Dense
text = "This is a sample text for language modeling using RNN."
chars = sorted ( set ( text) )
char_to_index = { char: index for index, char in enumerate ( chars) }
index_to_char = { index: char for index, char in enumerate ( chars) }
text_indices = [ char_to_index[ char] for char in text]
seq_length = 20
sequences = [ ]
next_char = [ ]
for i in range ( 0 , len ( text_indices) - seq_length) :
sequences.append ( text_indices[ i : i + seq_length] )
next_char.append ( text_indices[ i + seq_length] )
X = np.array ( sequences)
y = np.array ( next_char)
17
model = Sequential( [ Embedding( input_dim= len ( chars) , output_dim= 50 , input_length= seq_length) , SimpleRNN( 100 , return_sequences= False ) , Dense( len ( chars) , activation= "softmax" ) ] )
model.compile ( loss= "sparse_categorical_crossentropy" , optimizer= "adam" )
model.fit ( X, y, batch_size= 64 , epochs= 50 )
seed_text = "This is a sample te"
generated_text = seed_text
num_chars_to_generate = 100
for _ in range ( num_chars_to_generate) :
seed_indices = [ char_to_index[ char] for char in seed_text]
if len ( seed_indices) < seq_length:
diff = seq_length - len ( seed_indices)
seed_indices = [ 0 ] * diff + seed_indices
seed_indices = np.array ( seed_indices) .reshape ( 1 , -1 )
next_index = model.predict ( seed_indices) .argmax ( )
next_char = index_to_char[ next_index]
generated_text += next_char
seed_text = seed_text[ 1 :] + next_char
print ( generated_text)
aW1wb3J0IG51bXB5IGFzIG5wCmltcG9ydCB0ZW5zb3JmbG93IGFzIHRmCmZyb20gdGVuc29yZmxvdy5rZXJhcy5tb2RlbHMgaW1wb3J0IFNlcXVlbnRpYWwKZnJvbSB0ZW5zb3JmbG93LmtlcmFzLmxheWVycyBpbXBvcnQgRW1iZWRkaW5nLCBTaW1wbGVSTk4sIERlbnNlCnRleHQgPSAiVGhpcyBpcyBhIHNhbXBsZSB0ZXh0IGZvciBsYW5ndWFnZSBtb2RlbGluZyB1c2luZyBSTk4uIgpjaGFycyA9IHNvcnRlZChzZXQodGV4dCkpCmNoYXJfdG9faW5kZXggPSB7Y2hhcjogaW5kZXggZm9yIGluZGV4LCBjaGFyIGluIGVudW1lcmF0ZShjaGFycyl9CmluZGV4X3RvX2NoYXIgPSB7aW5kZXg6IGNoYXIgZm9yIGluZGV4LCBjaGFyIGluIGVudW1lcmF0ZShjaGFycyl9CnRleHRfaW5kaWNlcyA9IFtjaGFyX3RvX2luZGV4W2NoYXJdIGZvciBjaGFyIGluIHRleHRdCnNlcV9sZW5ndGggPSAyMApzZXF1ZW5jZXMgPSBbXQpuZXh0X2NoYXIgPSBbXQpmb3IgaSBpbiByYW5nZSgwLCBsZW4odGV4dF9pbmRpY2VzKSAtIHNlcV9sZW5ndGgpOgogc2VxdWVuY2VzLmFwcGVuZCh0ZXh0X2luZGljZXNbaSA6IGkgKyBzZXFfbGVuZ3RoXSkKIG5leHRfY2hhci5hcHBlbmQodGV4dF9pbmRpY2VzW2kgKyBzZXFfbGVuZ3RoXSkKWCA9IG5wLmFycmF5KHNlcXVlbmNlcykKeSA9IG5wLmFycmF5KG5leHRfY2hhcikKMTcKbW9kZWwgPSBTZXF1ZW50aWFsKFtFbWJlZGRpbmcoaW5wdXRfZGltPWxlbihjaGFycyksIG91dHB1dF9kaW09NTAsIGlucHV0X2xlbmd0aD1zZXFfbGVuZ3RoKSxTaW1wbGVSTk4oMTAwLCByZXR1cm5fc2VxdWVuY2VzPUZhbHNlKSxEZW5zZShsZW4oY2hhcnMpLCBhY3RpdmF0aW9uPSJzb2Z0bWF4IildKQptb2RlbC5jb21waWxlKGxvc3M9InNwYXJzZV9jYXRlZ29yaWNhbF9jcm9zc2VudHJvcHkiLCBvcHRpbWl6ZXI9ImFkYW0iKQptb2RlbC5maXQoWCwgeSwgYmF0Y2hfc2l6ZT02NCwgZXBvY2hzPTUwKQpzZWVkX3RleHQgPSAiVGhpcyBpcyBhIHNhbXBsZSB0ZSIKZ2VuZXJhdGVkX3RleHQgPSBzZWVkX3RleHQKbnVtX2NoYXJzX3RvX2dlbmVyYXRlID0gMTAwCmZvciBfIGluIHJhbmdlKG51bV9jaGFyc190b19nZW5lcmF0ZSk6CiBzZWVkX2luZGljZXMgPSBbY2hhcl90b19pbmRleFtjaGFyXSBmb3IgY2hhciBpbiBzZWVkX3RleHRdCgogaWYgbGVuKHNlZWRfaW5kaWNlcykgPCBzZXFfbGVuZ3RoOgogICAgIGRpZmYgPSBzZXFfbGVuZ3RoIC0gbGVuKHNlZWRfaW5kaWNlcykKICAgICBzZWVkX2luZGljZXMgPSBbMF0gKiBkaWZmICsgc2VlZF9pbmRpY2VzCgogc2VlZF9pbmRpY2VzID0gbnAuYXJyYXkoc2VlZF9pbmRpY2VzKS5yZXNoYXBlKDEsIC0xKQogbmV4dF9pbmRleCA9IG1vZGVsLnByZWRpY3Qoc2VlZF9pbmRpY2VzKS5hcmdtYXgoKQogbmV4dF9jaGFyID0gaW5kZXhfdG9fY2hhcltuZXh0X2luZGV4XQogZ2VuZXJhdGVkX3RleHQgKz0gbmV4dF9jaGFyCiBzZWVkX3RleHQgPSBzZWVkX3RleHRbMTpdICsgbmV4dF9jaGFyCnByaW50KGdlbmVyYXRlZF90ZXh0KQ==