Skip to content

Commit 78153ad

Browse files
committed
Whitespace
1 parent f6f0053 commit 78153ad

File tree

2 files changed

+65
-66
lines changed

2 files changed

+65
-66
lines changed

src/edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ public WordsToSentencesAnnotator(Properties properties) {
5353
// throw "\n" in just in case files use that instead of
5454
// the system separator
5555
// this constructor will keep empty lines as empty sentences
56-
wts1 = new WordToSentenceProcessor<>(ArrayUtils.asImmutableSet(new String[]{System.lineSeparator(), "\n",
57-
AbstractTokenizer.NEWLINE_TOKEN}));
56+
wts1 = new WordToSentenceProcessor<>(ArrayUtils.asImmutableSet(new String[]{System.lineSeparator(), "\n", AbstractTokenizer.NEWLINE_TOKEN}));
5857
}
5958
} else {
6059
// this constructor will keep empty lines as empty sentences

test/src/edu/stanford/nlp/process/WordToSentenceProcessorTest.java

Lines changed: 64 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ public class WordToSentenceProcessorTest extends TestCase {
1919

2020
private static final Annotator ud = new TokenizerAnnotator(false, "en");
2121
private static final Annotator udNL = new TokenizerAnnotator(false, "en", "invertible,tokenizeNLs=true");
22-
private static final Annotator wsNL = new TokenizerAnnotator(false,
23-
PropertiesUtils.asProperties("tokenize.whitespace", "true", "invertible", "true", "tokenizeNLs", "true"));
22+
private static final Annotator wsNL =
23+
new TokenizerAnnotator(false, PropertiesUtils.asProperties("tokenize.whitespace", "true", "invertible", "true", "tokenizeNLs", "true"));
2424

2525
private static final WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<>();
2626
private static final WordToSentenceProcessor<CoreLabel> wtsNull =
27-
new WordToSentenceProcessor<>(true); // treat input as one sentence
28-
private static final WordToSentenceProcessor<CoreLabel> cwts = new WordToSentenceProcessor<>(
29-
"[.。]|[!?!?]+", WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE, false);
27+
new WordToSentenceProcessor<>(true); // treat input as one sentence
28+
private static final WordToSentenceProcessor<CoreLabel> cwts =
29+
new WordToSentenceProcessor<>("[.。]|[!?!?]+", WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE, false);
3030

3131

3232
private static void checkResult(WordToSentenceProcessor<CoreLabel> wts,
@@ -104,103 +104,103 @@ public void testMr() {
104104

105105
public void testNullSplitter() {
106106
checkResult(wtsNull, "This should be one sentence. There is no split.",
107-
"This should be one sentence. There is no split.");
107+
"This should be one sentence. There is no split.");
108108
}
109109

110110
public void testParagraphStrategies() {
111111
final WordToSentenceProcessor<CoreLabel> wtsNever =
112-
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER);
112+
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER);
113113
final WordToSentenceProcessor<CoreLabel> wtsAlways =
114-
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.ALWAYS);
114+
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.ALWAYS);
115115
final WordToSentenceProcessor<CoreLabel> wtsTwo =
116-
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE);
116+
new WordToSentenceProcessor<>(WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE);
117117

118118
String input1 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.";
119119
String input2 = "Depending on the options,\nthis could be all sorts of things,\n as I like chocolate. And cookies.";
120120
checkResult(wtsNever, input1,
121-
"Depending on the options,\nthis could be all sorts of things,\n\nas I like chocolate.",
122-
"And cookies.");
121+
"Depending on the options,\nthis could be all sorts of things,\n\nas I like chocolate.",
122+
"And cookies.");
123123
checkResult(wtsAlways, input1,
124-
"Depending on the options,",
125-
"this could be all sorts of things,",
126-
"as I like chocolate.",
127-
"And cookies.");
124+
"Depending on the options,",
125+
"this could be all sorts of things,",
126+
"as I like chocolate.",
127+
"And cookies.");
128128
checkResult(wtsTwo, input1,
129-
"Depending on the options, this could be all sorts of things,",
130-
"as I like chocolate.",
131-
"And cookies.");
129+
"Depending on the options, this could be all sorts of things,",
130+
"as I like chocolate.",
131+
"And cookies.");
132132
checkResult(wtsNever, input2,
133-
"Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.",
134-
"And cookies.");
133+
"Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.",
134+
"And cookies.");
135135
checkResult(wtsAlways, input2,
136-
"Depending on the options,",
137-
"this could be all sorts of things,",
138-
"as I like chocolate.",
139-
"And cookies.");
136+
"Depending on the options,",
137+
"this could be all sorts of things,",
138+
"as I like chocolate.",
139+
"And cookies.");
140140
checkResult(wtsTwo, input2,
141-
"Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.",
142-
"And cookies.");
141+
"Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.",
142+
"And cookies.");
143143
String input3 = "Specific descriptions are absent.\n\n''Mossy Head Industrial Park'' it says.";
144144
checkResult(wtsTwo, input3,
145-
"Specific descriptions are absent.",
146-
"''Mossy Head Industrial Park'' it says.");
145+
"Specific descriptions are absent.",
146+
"''Mossy Head Industrial Park'' it says.");
147147
}
148148

149149
public void testXmlElements() {
150150
final WordToSentenceProcessor<CoreLabel> wtsXml =
151-
new WordToSentenceProcessor<>(null, null,null,
152-
Generics.newHashSet(Arrays.asList("p", "chapter")),
153-
WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null);
151+
new WordToSentenceProcessor<>(null, null,null,
152+
Generics.newHashSet(Arrays.asList("p", "chapter")),
153+
WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null);
154154

155155
String input1 = "<chapter>Chapter 1</chapter><p>This is text. So is this.</p> <p>One without end</p><p>Another</p><p>And another</p>";
156156
checkResult(wtsXml, input1,
157-
"Chapter 1",
158-
"This is text.",
159-
"So is this.",
160-
"One without end",
161-
"Another",
162-
"And another");
157+
"Chapter 1",
158+
"This is text.",
159+
"So is this.",
160+
"One without end",
161+
"Another",
162+
"And another");
163163
}
164164

165165
public void testRegion() {
166166
final WordToSentenceProcessor<CoreLabel> wtsRegion =
167-
new WordToSentenceProcessor<>(WordToSentenceProcessor.DEFAULT_BOUNDARY_REGEX,
168-
WordToSentenceProcessor.DEFAULT_BOUNDARY_FOLLOWERS_REGEX,
169-
WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD,
170-
Generics.newHashSet(Collections.singletonList("p")),
171-
"chapter|preface", WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null, false, false);
167+
new WordToSentenceProcessor<>(WordToSentenceProcessor.DEFAULT_BOUNDARY_REGEX,
168+
WordToSentenceProcessor.DEFAULT_BOUNDARY_FOLLOWERS_REGEX,
169+
WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD,
170+
Generics.newHashSet(Collections.singletonList("p")),
171+
"chapter|preface", WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null, false, false);
172172
String input1 = "<title>Chris rules!</title><preface><p>Para one</p><p>Para two</p></preface>" +
173-
"<chapter><p>Text we like. Two sentences \n\n in it.</p></chapter><coda>Some more text here</coda>";
173+
"<chapter><p>Text we like. Two sentences \n\n in it.</p></chapter><coda>Some more text here</coda>";
174174
checkResult(wtsRegion, input1,
175-
"Para one",
176-
"Para two",
177-
"Text we like.",
178-
"Two sentences in it.");
175+
"Para one",
176+
"Para two",
177+
"Text we like.",
178+
"Two sentences in it.");
179179

180180
}
181181

182182
public void testBlankLines() {
183183
final WordToSentenceProcessor<CoreLabel> wtsLines =
184-
new WordToSentenceProcessor<>(Generics.newHashSet(WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD));
184+
new WordToSentenceProcessor<>(Generics.newHashSet(WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD));
185185
String input1 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.";
186186
checkResult(wtsLines, input1,
187-
"Depending on the options,",
188-
"this could be all sorts of things,",
189-
"",
190-
"as I like chocolate. And cookies.");
187+
"Depending on the options,",
188+
"this could be all sorts of things,",
189+
"",
190+
"as I like chocolate. And cookies.");
191191
String input2 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n";
192192
checkResult(wtsLines, input2,
193-
"Depending on the options,",
194-
"this could be all sorts of things,",
195-
"",
196-
"as I like chocolate. And cookies.");
193+
"Depending on the options,",
194+
"this could be all sorts of things,",
195+
"",
196+
"as I like chocolate. And cookies.");
197197
String input3 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n\n";
198198
checkResult(wtsLines, input3,
199-
"Depending on the options,",
200-
"this could be all sorts of things,",
201-
"",
202-
"as I like chocolate. And cookies.",
203-
"");
199+
"Depending on the options,",
200+
"this could be all sorts of things,",
201+
"",
202+
"as I like chocolate. And cookies.",
203+
"");
204204
}
205205

206206
public void testExclamationPoint() {
@@ -225,10 +225,10 @@ public void testChinese() {
225225
*/
226226
public void testParagraphSeparator() {
227227
checkResult(wts, "Hello\u2029World.",
228-
"Hello", "World.");
228+
"Hello", "World.");
229229
checkResult(wts, "Hello.\u2029World.",
230-
"Hello.", "World.");
230+
"Hello.", "World.");
231231
checkResult(wts, "Hello \u2029World.",
232-
"Hello", "World.");
232+
"Hello", "World.");
233233
}
234234
}

0 commit comments

Comments
 (0)