import numpy as np
import tensorflow as tf
from tensorflow.keras .models import Sequential
from tensorflow.keras .layers import Embedding, SimpleRNN, Dense
text = "This is a sample text for language modeling using RNN."
chars = sorted ( set ( text) )
char_to_index = { char: index for index, char in enumerate ( chars) }
index_to_char = { index: char for index, char in enumerate ( chars) }
text_indices = [ char_to_index[ char] for char in text]
seq_length, sequences, next_char = 20 , [ ] , [ ]
for i in range ( 0 , len ( text_indices) - seq_length) :
sequences.append ( text_indices[ i : i + seq_length] )
next_char.append ( text_indices[ i + seq_length] )
X, y = np.array ( sequences) , np.array ( next_char)
17
model = Sequential( [ Embedding( input_dim= len ( chars) , output_dim= 50 , input_length= seq_length) , SimpleRNN( 100 , return_sequences= False ) , Dense( len ( chars) , activation= "softmax" ) ] )
model.compile ( loss= "sparse_categorical_crossentropy" , optimizer= "adam" )
model.fit ( X, y, batch_size= 64 , epochs= 50 )
seed_text = "This is a sample te"
generated_text = seed_text
num_chars_to_generate = 100
for _ in range ( num_chars_to_generate) :
seed_indices = [ char_to_index[ char] for char in seed_text]
if len ( seed_indices) < seq_length:
diff = seq_length - len ( seed_indices)
seed_indices = [ 0 ] * diff + seed_indices
seed_indices = np.array ( seed_indices) .reshape ( 1 , -1 )
next_index = model.predict ( seed_indices) .argmax ( )
next_char = index_to_char[ next_index]
generated_text += next_char
seed_text = seed_text[ 1 :] + next_char
print ( generated_text)
aW1wb3J0IG51bXB5IGFzIG5wCmltcG9ydCB0ZW5zb3JmbG93IGFzIHRmCmZyb20gdGVuc29yZmxvdy5rZXJhcy5tb2RlbHMgaW1wb3J0IFNlcXVlbnRpYWwKZnJvbSB0ZW5zb3JmbG93LmtlcmFzLmxheWVycyBpbXBvcnQgRW1iZWRkaW5nLCBTaW1wbGVSTk4sIERlbnNlCnRleHQgPSAiVGhpcyBpcyBhIHNhbXBsZSB0ZXh0IGZvciBsYW5ndWFnZSBtb2RlbGluZyB1c2luZyBSTk4uIgpjaGFycyA9IHNvcnRlZChzZXQodGV4dCkpCmNoYXJfdG9faW5kZXggPSB7Y2hhcjogaW5kZXggZm9yIGluZGV4LCBjaGFyIGluIGVudW1lcmF0ZShjaGFycyl9CmluZGV4X3RvX2NoYXIgPSB7aW5kZXg6IGNoYXIgZm9yIGluZGV4LCBjaGFyIGluIGVudW1lcmF0ZShjaGFycyl9CnRleHRfaW5kaWNlcyA9IFtjaGFyX3RvX2luZGV4W2NoYXJdIGZvciBjaGFyIGluIHRleHRdCnNlcV9sZW5ndGgsc2VxdWVuY2VzLG5leHRfY2hhciA9IDIwLFtdLFtdCmZvciBpIGluIHJhbmdlKDAsIGxlbih0ZXh0X2luZGljZXMpIC0gc2VxX2xlbmd0aCk6CiBzZXF1ZW5jZXMuYXBwZW5kKHRleHRfaW5kaWNlc1tpIDogaSArIHNlcV9sZW5ndGhdKQogbmV4dF9jaGFyLmFwcGVuZCh0ZXh0X2luZGljZXNbaSArIHNlcV9sZW5ndGhdKQpYLHkgPSBucC5hcnJheShzZXF1ZW5jZXMpLG5wLmFycmF5KG5leHRfY2hhcikKMTcKbW9kZWwgPSBTZXF1ZW50aWFsKFtFbWJlZGRpbmcoaW5wdXRfZGltPWxlbihjaGFycyksIG91dHB1dF9kaW09NTAsIGlucHV0X2xlbmd0aD1zZXFfbGVuZ3RoKSxTaW1wbGVSTk4oMTAwLCByZXR1cm5fc2VxdWVuY2VzPUZhbHNlKSxEZW5zZShsZW4oY2hhcnMpLCBhY3RpdmF0aW9uPSJzb2Z0bWF4IildKQptb2RlbC5jb21waWxlKGxvc3M9InNwYXJzZV9jYXRlZ29yaWNhbF9jcm9zc2VudHJvcHkiLCBvcHRpbWl6ZXI9ImFkYW0iKQptb2RlbC5maXQoWCwgeSwgYmF0Y2hfc2l6ZT02NCwgZXBvY2hzPTUwKQpzZWVkX3RleHQgPSAiVGhpcyBpcyBhIHNhbXBsZSB0ZSIKZ2VuZXJhdGVkX3RleHQgPSBzZWVkX3RleHQKbnVtX2NoYXJzX3RvX2dlbmVyYXRlID0gMTAwCmZvciBfIGluIHJhbmdlKG51bV9jaGFyc190b19nZW5lcmF0ZSk6CiBzZWVkX2luZGljZXMgPSBbY2hhcl90b19pbmRleFtjaGFyXSBmb3IgY2hhciBpbiBzZWVkX3RleHRdCiBpZiBsZW4oc2VlZF9pbmRpY2VzKSA8IHNlcV9sZW5ndGg6CiAgICAgZGlmZiA9IHNlcV9sZW5ndGggLSBsZW4oc2VlZF9pbmRpY2VzKQogICAgIHNlZWRfaW5kaWNlcyA9IFswXSAqIGRpZmYgKyBzZWVkX2luZGljZXMKIHNlZWRfaW5kaWNlcyA9IG5wLmFycmF5KHNlZWRfaW5kaWNlcykucmVzaGFwZSgxLCAtMSkKIG5leHRfaW5kZXggPSBtb2RlbC5wcmVkaWN0KHNlZWRfaW5kaWNlcykuYXJnbWF4KCkKIG5leHRfY2hhciA9IGluZGV4X3RvX2NoYXJbbmV4dF9pbmRleF0KIGdlbmVyYXRlZF90ZXh0ICs9IG5leHRfY2hhcgogc2VlZF90ZXh0ID0gc2VlZF90ZXh0WzE6XSArIG5leHRfY2hhcgpwcmludChnZW5lcmF0ZWRfdGV4dCk=