MemoryError: Unable to allocate 29.3 GiB for an array with shape (2211861,) and data type <U3551 #607
Unanswered
rohankarande2023
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Getting an error while creating a Character Level Tokenizer for PubMed_200k_RCT_numbers_replaced_with_at_sign NLP project.
#Create Character Level Tokenizer:
char_vectorizer=tf.keras.layers.TextVectorization(max_tokens=Num_Char_Tokens,output_sequence_length=char_per_sentence, name='char_vectorizer')
#Adapt character vectorizer to training characters
char_vectorizer.adapt(train_chars)
MemoryError Traceback (most recent call last)
Cell In[256], line 3
1 # Adapt character vectorizer to training characters
----> 3 char_vectorizer.adapt(train_chars).batch(32)
File ~\anaconda3\lib\site-packages\keras\src\layers\preprocessing\text_vectorization.py:473, in TextVectorization.adapt(self, data, batch_size, steps)
423 def adapt(self, data, batch_size=None, steps=None):
424 """Computes a vocabulary of string terms from tokens in a dataset.
425
426 Calling
adapt()
on aTextVectorization
layer is an alternative to(...)
471 argument is not supported with array inputs.
472 """
--> 473 super().adapt(data, batch_size=batch_size, steps=steps)
File ~\anaconda3\lib\site-packages\keras\src\engine\base_preprocessing_layer.py:246, in PreprocessingLayer.adapt(self, data, batch_size, steps)
244 if self.built:
245 self.reset_state()
--> 246 data_handler = data_adapter.DataHandler(
247 data,
248 batch_size=batch_size,
249 steps_per_epoch=steps,
250 epochs=1,
251 steps_per_execution=self._steps_per_execution,
252 distribute=False,
253 )
254 self._adapt_function = self.make_adapt_function()
255 for _, iterator in data_handler.enumerate_epochs():
File ~\anaconda3\lib\site-packages\keras\src\engine\data_adapter.py:1285, in DataHandler.init(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution, distribute, pss_evaluation_shards)
1282 self._steps_per_execution = steps_per_execution
1284 adapter_cls = select_data_adapter(x, y)
-> 1285 self._adapter = adapter_cls(
1286 x,
1287 y,
1288 batch_size=batch_size,
1289 steps=steps_per_epoch,
1290 epochs=epochs - initial_epoch,
1291 sample_weights=sample_weight,
1292 shuffle=shuffle,
1293 max_queue_size=max_queue_size,
1294 workers=workers,
1295 use_multiprocessing=use_multiprocessing,
1296 distribution_strategy=tf.distribute.get_strategy(),
1297 model=model,
1298 pss_evaluation_shards=pss_evaluation_shards,
1299 )
1301 strategy = tf.distribute.get_strategy()
1303 self._current_step = 0
File ~\anaconda3\lib\site-packages\keras\src\engine\data_adapter.py:714, in ListsOfScalarsDataAdapter.init(self, x, y, sample_weights, sample_weight_modes, batch_size, shuffle, **kwargs)
703 def init(
704 self,
705 x,
(...)
711 **kwargs,
712 ):
713 super().init(x, y, **kwargs)
--> 714 x = np.asarray(x)
715 if y is not None:
716 y = np.asarray(y)
MemoryError: Unable to allocate 29.3 GiB for an array with shape (2211861,) and data type <U3551
Beta Was this translation helpful? Give feedback.
All reactions