77import ml .anon .anonymization .model .Anonymization ;
88import ml .anon .docmgmt .service .TokenizerService ;
99import org .springframework .beans .factory .annotation .Autowired ;
10+ import org .springframework .stereotype .Component ;
1011
1112import java .lang .reflect .Array ;
1213import java .util .*;
1516 * Compares the originals of the anonymizations and dismisses duplicates
1617 * Created by matthias on 20.08.2017
1718 */
19+ @ Component
1820public class ListPreparation {
1921
2022 @ Autowired
21- TokenizerService tokenizerService = new TokenizerService () ;
23+ private TokenizerService tokenizerService ;
2224
2325 /**
2426 * Removes the duplicates by looking at the original value of the anonymization objects
27+ *
2528 * @param anonymizations list of anonymizations from ml and rulebased approaches
2629 * @return the list of anonymizations without duplicates
2730 */
2831 private ArrayList <Anonymization > removeDuplicates (List <Anonymization > anonymizations ) {
2932 ArrayList <Anonymization > noDuplicate = new ArrayList <Anonymization >();
3033
3134 ObjectMapper mapper = new ObjectMapper ();
32- anonymizations = mapper .convertValue (anonymizations , new TypeReference <List <Anonymization >>(){});
35+ anonymizations = mapper .convertValue (anonymizations , new TypeReference <List <Anonymization >>() {
36+ });
3337 boolean contained = false ;
3438 for (Anonymization anon1 : anonymizations ) {
3539 for (Anonymization anon2 : noDuplicate ) {
36- if (anon1 .getData ().getOriginal ().equals (anon2 .getData ().getOriginal ())){
40+ if (anon1 .getData ().getOriginal ().equals (anon2 .getData ().getOriginal ())) {
3741 contained = true ;
3842 break ;
3943 }
4044 }
41- if (!contained ){
45+ if (!contained ) {
4246 noDuplicate .add (anon1 );
4347 }
4448 contained = false ;
@@ -49,15 +53,16 @@ private ArrayList<Anonymization> removeDuplicates(List<Anonymization> anonymizat
4953 /**
5054 * Sorts the Anonymization list by the number of tokens the original holds, to cope with encapsulated
5155 * anonymizations.
56+ *
5257 * @param anonymizations list to sort by token number
5358 * @return the sorted list of {@link Anonymization}s
5459 */
5560 private List <Anonymization > sortByTokenNumber (List <Anonymization > anonymizations ) {
5661
5762 HashMap <Anonymization , Integer > anonymizationTokenNumber = new HashMap <>();
5863 anonymizations .forEach (anonymization ->
59- anonymizationTokenNumber .put (anonymization , tokenizerService
60- .tokenize (anonymization .getData ().getOriginal ()).size ())
64+ anonymizationTokenNumber .put (anonymization , tokenizerService
65+ .tokenize (anonymization .getData ().getOriginal ()).size ())
6166 );
6267
6368 Collections .sort (anonymizations ,
@@ -70,10 +75,11 @@ private List<Anonymization> sortByTokenNumber(List<Anonymization> anonymizations
7075
7176 /**
7277 * Applies the duplicate removal and sort by number of tokens
78+ *
7379 * @param anonymizations list to apply operations on
7480 * @return sorted {@link Anonymization} list without duplicates
7581 */
76- public List <Anonymization > prepareAnonymizationList (List <Anonymization > anonymizations ){
82+ public List <Anonymization > prepareAnonymizationList (List <Anonymization > anonymizations ) {
7783
7884 anonymizations = this .removeDuplicates (anonymizations );
7985 anonymizations = this .sortByTokenNumber (anonymizations );
0 commit comments