@@ -111,6 +111,20 @@ def alignWords(self,sourceSent, targetSent, sourceParseResult, targetParseResult
111
111
if item [1 ] not in tarWordAlreadyAligned :
112
112
tarWordAlreadyAligned .append (item [1 ])
113
113
114
+ aligned_nouns = self .align_Nouns (self .sourceWordIndices , self .targetWordIndices , sourceWords , targetWords ,
115
+ self .sourceLemmas , self .targetLemmas , self .sourcePosTags , self .targetPosTags ,
116
+ sourceParseResult , targetParseResult , alignments , srcWordAlreadyAligned , tarWordAlreadyAligned )
117
+
118
+ print "aligned nouns " , aligned_nouns
119
+
120
+ for item in aligned_nouns :
121
+ if item not in alignments :
122
+ alignments .append (item )
123
+ if item [0 ] not in srcWordAlreadyAligned :
124
+ srcWordAlreadyAligned .append (item [0 ])
125
+ if item [1 ] not in tarWordAlreadyAligned :
126
+ tarWordAlreadyAligned .append (item [1 ])
127
+
114
128
return alignments
115
129
116
130
@@ -745,4 +759,258 @@ def align_mainVerbs(self, srcWordIndices, tarWordIndices, srcWords, tarWords, sr
745
759
else :
746
760
break
747
761
748
- return AlignedVerbs
762
+ return AlignedVerbs
763
+
764
+
765
+ '''
766
+ Returns Aligned Nouns
767
+ '''
768
+
769
+ def align_Nouns (self , srcWordIndices , tarWordIndices , srcWords , tarWords , srcLemmas ,\
770
+ tarLemmas , srcPosTags , tarPosTags , sourceParseResult ,targetParseResult , existingalignments ,
771
+ srcWordAlreadyAligned , tarWordAlreadyAligned ):
772
+
773
+
774
+ nounAlignments = []
775
+ numberofNounsInSource = 0
776
+ evidenceCountMatrix = {}
777
+ relativeAlignmentsMatrix = {} # contains aligned Verbs with their similar child/parents
778
+ wordSimilarity = {} # dictionary contains similarity score of two word indices(src and tar)
779
+
780
+ sourceDependencyParse = self .util .dependencyTreeWithOffSets (sourceParseResult )
781
+ targetDependencyParse = self .util .dependencyTreeWithOffSets (targetParseResult )
782
+
783
+ for i in srcWordIndices :
784
+
785
+ if i in srcWordAlreadyAligned or (srcPosTags [i - 1 ][0 ].lower () != 'n' \
786
+ and srcPosTags [i - 1 ].lower () != 'prp' ):
787
+ continue
788
+
789
+ numberofNounsInSource += 1
790
+
791
+ for j in tarWordIndices :
792
+
793
+ if j in tarWordAlreadyAligned or (tarPosTags [j - 1 ][0 ].lower () != 'n' \
794
+ and tarPosTags [j - 1 ].lower () != 'prp' ):
795
+ continue
796
+
797
+ getSimilarityScore = max (self .word_similarity .computeWordSimilarityScore (srcWords [i - 1 ], \
798
+ srcPosTags [i - 1 ], tarWords [j - 1 ], tarPosTags [j - 1 ]), \
799
+ self .word_similarity .computeWordSimilarityScore (srcLemmas [i - 1 ],\
800
+ srcPosTags [i - 1 ], tarLemmas [j - 1 ], tarPosTags [j - 1 ]))
801
+ if getSimilarityScore < ppdbSim :
802
+ continue
803
+
804
+ wordSimilarity [(i ,j )] = getSimilarityScore
805
+
806
+ sourceWordParents = self .util .findParents (sourceDependencyParse , i , srcWords [i - 1 ])
807
+ sourceWordChildren = self .util .findChildren (sourceDependencyParse , i , srcWords [i - 1 ])
808
+ targetWordParents = self .util .findParents (targetDependencyParse , j , tarWords [j - 1 ])
809
+ targetWordChildren = self .util .findChildren (targetDependencyParse , j , tarWords [j - 1 ])
810
+
811
+ #search for common or equivalent children
812
+ groupOfSimilarRelationsForNounChild = ['pos' , 'nn' 'prep_of' , 'prep_in' , 'prep_at' , 'prep_for' ]
813
+ groupOfSimilarRelationsForVerbChild = ['infmod' , 'partmod' , 'rcmod' ]
814
+ groupOfSimilarRelationsForAdjectiveChild = ['amod' , 'rcmod' ]
815
+
816
+ for k in sourceWordChildren :
817
+ for l in targetWordChildren :
818
+ if (k [0 ], l [0 ]) in existingalignments + nounAlignments or \
819
+ max ( self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], \
820
+ l [1 ], tarPosTags [l [0 ]- 1 ]),\
821
+ self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
822
+ srcPosTags [k [0 ]- 1 ],tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ])) \
823
+ >= ppdbSim and \
824
+ ((k [2 ] == l [2 ]) or \
825
+ (k [2 ] in groupOfSimilarRelationsForNounChild and l [2 ] in groupOfSimilarRelationsForNounChild ) or \
826
+ (k [2 ] in groupOfSimilarRelationsForVerbChild and l [2 ] in groupOfSimilarRelationsForVerbChild ) or \
827
+ (k [2 ] in groupOfSimilarRelationsForAdjectiveChild and l [2 ] in groupOfSimilarRelationsForAdjectiveChild )):
828
+
829
+ if (i , j ) in evidenceCountMatrix :
830
+ evidenceCountMatrix [(i , j )] += max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
831
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
832
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
833
+ else :
834
+
835
+ evidenceCountMatrix [(i , j )] = max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
836
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
837
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
838
+
839
+
840
+ if (i , j ) in relativeAlignmentsMatrix :
841
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
842
+
843
+ else :
844
+ relativeAlignmentsMatrix [(i ,j )] = []
845
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
846
+
847
+ #search for common or equivalent parents
848
+
849
+ groupOfSimilarRelationsForNounParent = ['pos' , 'nn' , 'prep_of' , 'prep_in' , 'prep_at' , 'prep_for' ]
850
+ group1OfSimilarRelationsForVerbParent = ['agent' , 'nsubj' , 'xsubj' ]
851
+ group2OfSimilarRelationsForVerbParent = ['ccomp' , 'dobj' , 'nsubjpass' , 'rel' , 'partmod' ]
852
+ group3OfSimilarRelationsForVerbParent = ['tmod' 'prep_in' , 'prep_at' , 'prep_on' ]
853
+ group4OfSimilarRelationsForVerbParent = ['iobj' , 'prep_to' ]
854
+
855
+
856
+ for k in sourceWordParents :
857
+ for l in targetWordParents :
858
+ if (k [0 ], l [0 ]) in existingalignments + nounAlignments or \
859
+ max ( self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], \
860
+ l [1 ], tarPosTags [l [0 ]- 1 ]),\
861
+ self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
862
+ srcPosTags [k [0 ]- 1 ],tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ])) \
863
+ >= ppdbSim and \
864
+ ((k [2 ] == l [2 ]) or \
865
+ (k [2 ] in groupOfSimilarRelationsForNounParent and l [2 ] in groupOfSimilarRelationsForNounParent ) or \
866
+ (k [2 ] in group1OfSimilarRelationsForVerbParent and l [2 ] in group1OfSimilarRelationsForVerbParent ) or \
867
+ (k [2 ] in group2OfSimilarRelationsForVerbParent and l [2 ] in group2OfSimilarRelationsForVerbParent ) or \
868
+ (k [2 ] in group3OfSimilarRelationsForVerbParent and l [2 ] in group3OfSimilarRelationsForVerbParent ) or \
869
+ (k [2 ] in group4OfSimilarRelationsForVerbParent and k [2 ] in group4OfSimilarRelationsForVerbParent )):
870
+
871
+ if (i , j ) in evidenceCountMatrix :
872
+ evidenceCountMatrix [(i , j )] += max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
873
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
874
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
875
+ else :
876
+
877
+ evidenceCountMatrix [(i , j )] = max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
878
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
879
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
880
+
881
+
882
+ if (i , j ) in relativeAlignmentsMatrix :
883
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
884
+
885
+ else :
886
+ relativeAlignmentsMatrix [(i ,j )] = []
887
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
888
+
889
+ groupOfSimilarRelationsInOppositeDirectionForAdjectiveParentAndChild = [['nsubj' ], ['amod' , 'rcmod' ]]
890
+ groupOfSimilarRelationsInOppositeDirectionForVerbParentAndChild = [['ccomp' , 'dobj' , 'nsubjpass' , 'rel' , 'partmod' ], ['infmod' , 'partmod' , 'rcmod' ]]
891
+ group1OfSimilarRelationsInOppositeDirectionForNounParentAndChild = [['conj_and' ], ['conj_and' ]]
892
+ group2OfSimilarRelationsInOppositeDirectionForNounParentAndChild = [['conj_or' ], ['conj_or' ]]
893
+ group3OfSimilarRelationsInOppositeDirectionForNounParentAndChild = [['conj_nor' ], ['conj_nor' ]]
894
+ # search for equivalent parent-child relations
895
+ evidenceCountMatrix , relativeAlignmentsMatrix = self .findEquivalentRelationAlignNouns (i , j , sourceWordParents , targetWordChildren , \
896
+ nounAlignments , existingalignments ,\
897
+ srcPosTags , tarPosTags , srcLemmas ,tarLemmas , groupOfSimilarRelationsInOppositeDirectionForAdjectiveParentAndChild [0 ], \
898
+ groupOfSimilarRelationsInOppositeDirectionForAdjectiveParentAndChild [1 ],\
899
+ groupOfSimilarRelationsInOppositeDirectionForVerbParentAndChild [0 ],\
900
+ groupOfSimilarRelationsInOppositeDirectionForVerbParentAndChild [1 ], \
901
+ group1OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ],\
902
+ group1OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ], \
903
+ group2OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ], \
904
+ group2OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ],\
905
+ group3OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ], \
906
+ group3OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ], \
907
+ evidenceCountMatrix ,relativeAlignmentsMatrix )
908
+
909
+ # search for equivalent child-parent relations
910
+ evidenceCountMatrix , relativeAlignmentsMatrix = self .findEquivalentRelationAlignNouns (i , j , sourceWordChildren , targetWordParents , \
911
+ nounAlignments , existingalignments ,\
912
+ srcPosTags , tarPosTags , srcLemmas ,tarLemmas , groupOfSimilarRelationsInOppositeDirectionForAdjectiveParentAndChild [1 ], \
913
+ groupOfSimilarRelationsInOppositeDirectionForAdjectiveParentAndChild [0 ],\
914
+ groupOfSimilarRelationsInOppositeDirectionForVerbParentAndChild [1 ],\
915
+ groupOfSimilarRelationsInOppositeDirectionForVerbParentAndChild [0 ], \
916
+ group1OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ],\
917
+ group1OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ], \
918
+ group2OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ], \
919
+ group2OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ],\
920
+ group3OfSimilarRelationsInOppositeDirectionForNounParentAndChild [1 ], \
921
+ group3OfSimilarRelationsInOppositeDirectionForNounParentAndChild [0 ], \
922
+ evidenceCountMatrix ,relativeAlignmentsMatrix )
923
+
924
+ # use collected stats to align
925
+
926
+ for p in xrange (numberofNounsInSource ):
927
+
928
+ maxEvidenceCountForCurrentPass = 0
929
+ maxOverallValueForCurrentPass = 0
930
+ indexPairWithStrongestTieForCurrentPass = [- 1 , - 1 ] # indexes of aligned nouns
931
+
932
+ for i in srcWordIndices :
933
+ if i in srcWordAlreadyAligned or srcPosTags [i - 1 ][0 ].lower () != 'n' or \
934
+ srcLemmas [i - 1 ] in stopwords :
935
+ continue
936
+
937
+ for j in tarWordIndices :
938
+ if j in tarWordAlreadyAligned or tarPosTags [j - 1 ][0 ].lower () != 'n' or \
939
+ tarLemmas [j - 1 ] in stopwords :
940
+ continue
941
+
942
+ if (i , j ) in evidenceCountMatrix and theta1 * wordSimilarity [(i ,j )] + \
943
+ (1 - theta1 )* evidenceCountMatrix [(i , j )] > maxOverallValueForCurrentPass :
944
+ maxOverallValueForCurrentPass = theta1 * wordSimilarity [(i ,j )] + \
945
+ (1 - theta1 )* evidenceCountMatrix [(i , j )]
946
+ maxEvidenceCountForCurrentPass = evidenceCountMatrix [(i , j )]
947
+ indexPairWithStrongestTieForCurrentPass = [i , j ]
948
+
949
+ if maxEvidenceCountForCurrentPass > 0 :
950
+ nounAlignments .append (indexPairWithStrongestTieForCurrentPass )
951
+ srcWordAlreadyAligned .append (indexPairWithStrongestTieForCurrentPass [0 ])
952
+ tarWordAlreadyAligned .append (indexPairWithStrongestTieForCurrentPass [1 ])
953
+
954
+ for item in relativeAlignmentsMatrix [(indexPairWithStrongestTieForCurrentPass [0 ], \
955
+ indexPairWithStrongestTieForCurrentPass [1 ])]:
956
+ # item[0] and item[1] != 0 so that we should not store Root-0
957
+ if item [0 ] != 0 and item [1 ] != 0 and item [0 ] not in srcWordAlreadyAligned and \
958
+ item [1 ] not in tarWordAlreadyAligned :
959
+ nounAlignments .append (item )
960
+ srcWordAlreadyAligned .append (item [0 ])
961
+ tarWordAlreadyAligned .append (item [1 ])
962
+ # no aligned nouns formed
963
+ else :
964
+ break
965
+
966
+ return nounAlignments
967
+
968
+
969
+ '''
970
+ Auxillary verb to find equivalent parent-child / child-parent relation in align nouns
971
+ '''
972
+
973
+
974
+ def findEquivalentRelationAlignNouns (self , i , j , sourceDepenency , targetDependency , nounAlignments , existingalignments ,\
975
+ srcPosTags , tarPosTags , srcLemmas ,tarLemmas , AdjParentAndChildSrc , AdjParentAndChildTar ,\
976
+ OppDirecVerbParentAndChildSrc ,OppDirecVerbParentAndChildTar , \
977
+ group1OppDirectNounParentAndChildSrc ,group1OppDirectNounParentAndChildTar , \
978
+ group2OppDirectNounParentAndChildSrc , group2OppDirectNounParentAndChildTar ,\
979
+ group3OppDirectNounParentAndChildSrc , group3OppDirectNounParentAndChildTar , \
980
+ evidenceCountMatrix ,relativeAlignmentsMatrix ):
981
+
982
+
983
+ for k in sourceDepenency :
984
+ for l in targetDependency :
985
+ if (k [0 ], l [0 ]) in existingalignments + nounAlignments or \
986
+ max ( self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], \
987
+ l [1 ], tarPosTags [l [0 ]- 1 ]),\
988
+ self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
989
+ srcPosTags [k [0 ]- 1 ],tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ])) \
990
+ >= ppdbSim and \
991
+ ((k [2 ] == l [2 ]) or \
992
+ (k [2 ] in AdjParentAndChildSrc and l [2 ] in AdjParentAndChildTar ) or \
993
+ (k [2 ] in OppDirecVerbParentAndChildSrc and l [2 ] in OppDirecVerbParentAndChildTar ) or \
994
+ (k [2 ] in group1OppDirectNounParentAndChildSrc and l [2 ] in group1OppDirectNounParentAndChildTar ) or \
995
+ (k [2 ] in group2OppDirectNounParentAndChildSrc and l [2 ] in group2OppDirectNounParentAndChildTar ) or \
996
+ (k [2 ] in group3OppDirectNounParentAndChildSrc and k [2 ] in group3OppDirectNounParentAndChildTar )):
997
+
998
+ if (i , j ) in evidenceCountMatrix :
999
+ evidenceCountMatrix [(i , j )] += max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
1000
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
1001
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
1002
+ else :
1003
+
1004
+ evidenceCountMatrix [(i , j )] = max (self .word_similarity .computeWordSimilarityScore (k [1 ], srcPosTags [k [0 ]- 1 ], l [1 ], \
1005
+ tarPosTags [l [0 ]- 1 ]), self .word_similarity .computeWordSimilarityScore (srcLemmas [k [0 ]- 1 ], \
1006
+ srcPosTags [k [0 ]- 1 ], tarLemmas [l [0 ]- 1 ], tarPosTags [l [0 ]- 1 ]))
1007
+
1008
+
1009
+ if (i , j ) in relativeAlignmentsMatrix :
1010
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
1011
+
1012
+ else :
1013
+ relativeAlignmentsMatrix [(i ,j )] = []
1014
+ relativeAlignmentsMatrix [(i ,j )].append ([k [0 ],l [0 ]])
1015
+
1016
+ return evidenceCountMatrix , relativeAlignmentsMatrix
0 commit comments