14
14
15
15
import org .ejml .simple .SimpleMatrix ;
16
16
17
+ import edu .stanford .nlp .coref .fastneural .FastNeuralCorefModel ;
18
+ import edu .stanford .nlp .coref .neural .EmbeddingExtractor ;
17
19
import edu .stanford .nlp .coref .neural .NeuralCorefModel ;
18
20
import edu .stanford .nlp .io .IOUtils ;
19
21
import edu .stanford .nlp .parser .dvparser .DVModel ;
@@ -34,7 +36,7 @@ public enum Stage {
34
36
}
35
37
36
38
public enum Model {
37
- SENTIMENT , DVPARSER , COREF , EMBEDDING
39
+ SENTIMENT , DVPARSER , COREF , EMBEDDING , FASTCOREF
38
40
}
39
41
40
42
/**
@@ -222,6 +224,7 @@ public static void writeEmbedding(Embedding embedding, ObjectOutputStream out)
222
224
public static Embedding readEmbedding (ObjectInputStream in )
223
225
throws IOException , ClassNotFoundException
224
226
{
227
+
225
228
Function <List <List <Double >>, SimpleMatrix > f = (x ) -> toMatrix (x );
226
229
Map <String , List <List <Double >>> map = ErasureUtils .uncheckedCast (in .readObject ());
227
230
Map <String , SimpleMatrix > vectors = transformMap (map , f );
@@ -267,6 +270,48 @@ public static NeuralCorefModel readCoref(ObjectInputStream in)
267
270
return model ;
268
271
}
269
272
273
+ public static void writeFastCoref (FastNeuralCorefModel model , ObjectOutputStream out )
274
+ throws IOException
275
+ {
276
+ Function <SimpleMatrix , List <List <Double >>> f = (SimpleMatrix x ) -> fromMatrix (x );
277
+
278
+ EmbeddingExtractor embedding = model .getEmbeddingExtractor ();
279
+ out .writeObject (embedding .isConll ());
280
+ Embedding staticEmbedding = embedding .getStaticWordEmbeddings ();
281
+ if (staticEmbedding == null ) {
282
+ out .writeObject (false );
283
+ } else {
284
+ out .writeObject (true );
285
+ writeEmbedding (staticEmbedding , out );
286
+ }
287
+ writeEmbedding (embedding .getTunedWordEmbeddings (), out );
288
+ out .writeObject (embedding .getNAEmbedding ());
289
+
290
+ out .writeObject (model .getPairFeatureIds ());
291
+ out .writeObject (model .getMentionFeatureIds ());
292
+ out .writeObject (CollectionUtils .transformAsList (model .getAllWeights (), f ));
293
+ }
294
+
295
+ public static FastNeuralCorefModel readFastCoref (ObjectInputStream in )
296
+ throws IOException , ClassNotFoundException
297
+ {
298
+ Function <List <List <Double >>, SimpleMatrix > f = (x ) -> toMatrix (x );
299
+
300
+ boolean conll = ErasureUtils .uncheckedCast (in .readObject ());
301
+ boolean hasStatic = ErasureUtils .uncheckedCast (in .readObject ());
302
+ Embedding staticEmbedding = (hasStatic ) ? readEmbedding (in ) : null ;
303
+ Embedding tunedEmbedding = readEmbedding (in );
304
+ String naEmbedding = ErasureUtils .uncheckedCast (in .readObject ());
305
+
306
+ EmbeddingExtractor embedding = new EmbeddingExtractor (conll , staticEmbedding , tunedEmbedding , naEmbedding );
307
+
308
+ Map <String , Integer > pairFeatures = ErasureUtils .uncheckedCast (in .readObject ());
309
+ Map <String , Integer > mentionFeatures = ErasureUtils .uncheckedCast (in .readObject ());
310
+ List <SimpleMatrix > weights = CollectionUtils .transformAsList (ErasureUtils .uncheckedCast (in .readObject ()), f );
311
+
312
+ return new FastNeuralCorefModel (embedding , pairFeatures , mentionFeatures , weights );
313
+ }
314
+
270
315
/**
271
316
* This program converts a sentiment model or an RNN parser model
272
317
* from EJML v23, used by CoreNLP 3.9.2, to a more recent version of
@@ -314,6 +359,12 @@ public static NeuralCorefModel readCoref(ObjectInputStream in)
314
359
* <br>
315
360
* <code> java edu.stanford.nlp.neural.ConvertModels -stage NEW -model EMBEDDING -input /scr/nlp/data/coref/models/neural/english/english-embeddings.INT.ser.gz -output /scr/nlp/data/coref/models/neural/english/english-embeddings.e39.ser.gz</code>
316
361
* <br>
362
+ * There is another coref model which isn't used in corenlp, but it might be in the future. To upgrade this, use <code>-model FASTCOREF</code>
363
+ * <br>
364
+ * <code> java edu.stanford.nlp.neural.ConvertModels -stage OLD -model FASTCOREF -input /scr/nlp/data/coref/models/fastneural/fast-english-model.e38.ser.gz -output /scr/nlp/data/coref/models/fastneural/fast-english-model.INT.ser.gz</code>
365
+ * <br>
366
+ * <code> java edu.stanford.nlp.neural.ConvertModels -stage NEW -model FASTCOREF -input /scr/nlp/data/coref/models/fastneural/fast-english-model.INT.ser.gz -output /scr/nlp/data/coref/models/fastneural/fast-english-model.e39.ser.gz</code>
367
+ * <br>
317
368
*
318
369
* @author <a href=horatio@gmail.com>John Bauer</a>
319
370
*/
@@ -331,7 +382,7 @@ public static void main(String[] args) throws IOException, ClassNotFoundExceptio
331
382
try {
332
383
modelType = Model .valueOf (props .getProperty ("model" ).toUpperCase ());
333
384
} catch (IllegalArgumentException | NullPointerException e ) {
334
- throw new IllegalArgumentException ("Please specify -model, either SENTIMENT, DVPARSER, EMBEDDING, COREF" );
385
+ throw new IllegalArgumentException ("Please specify -model, either SENTIMENT, DVPARSER, EMBEDDING, COREF, FASTCOREF " );
335
386
}
336
387
337
388
if (!props .containsKey ("input" )) {
@@ -399,6 +450,18 @@ public static void main(String[] args) throws IOException, ClassNotFoundExceptio
399
450
in .close ();
400
451
IOUtils .writeObjectToFile (model , outputPath );
401
452
}
453
+ } else if (modelType == Model .FASTCOREF ) {
454
+ if (stage == Stage .OLD ) {
455
+ FastNeuralCorefModel model = ErasureUtils .uncheckedCast (IOUtils .readObjectFromURLOrClasspathOrFileSystem (inputPath ));
456
+ ObjectOutputStream out = IOUtils .writeStreamFromString (outputPath );
457
+ writeFastCoref (model , out );
458
+ out .close ();
459
+ } else {
460
+ ObjectInputStream in = IOUtils .readStreamFromString (inputPath );
461
+ FastNeuralCorefModel model = readFastCoref (in );
462
+ in .close ();
463
+ IOUtils .writeObjectToFile (model , outputPath );
464
+ }
402
465
} else {
403
466
throw new IllegalArgumentException ("Unknown model type " + modelType );
404
467
}
0 commit comments