@@ -102,7 +102,7 @@ public void test02() throws Exception {
102
102
}
103
103
104
104
@ Test
105
- public void test03 () throws Exception {
105
+ public void test03 () throws Exception {
106
106
String fileName = "D:\\ Program Files (x86)\\ Reader_v1.9.3.2\\ 天命大反派.txt" ;
107
107
if (!FileUtil .exist (fileName )) {
108
108
File file = new File (fileName );
@@ -134,18 +134,19 @@ public void test03() throws Exception {
134
134
WebDriver chromeDriver = new ChromeDriver (chromeOptions );
135
135
136
136
TestSelenium .text (chromeDriver , "https://www.xs123.org/xs/33/33112/21404025.html" , fileName );
137
+ // TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70349769.html", fileName);
137
138
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70963309.html", fileName);
138
139
139
140
chromeDriver .close ();
140
141
}
141
142
142
- public static void text (WebDriver chromeDriver , String url , String fileName ) {
143
+ public static void text (WebDriver chromeDriver , String url , String fileName ) throws Exception {
143
144
chromeDriver .get (url );
145
+ Thread .sleep (100 );
144
146
WebElement boxWebElement = chromeDriver .findElement (By .className ("box_con" ));
145
147
WebElement titleElement = boxWebElement .findElement (By .tagName ("h1" ));
146
148
147
- // System.out.println(titleElement.getText());
148
- if (titleElement .getText ().contains ("950" )) {
149
+ if (titleElement .getText ().contains ("1030" )) {
149
150
return ;
150
151
}
151
152
if ("玄幻:我!天命大反派" .equals (titleElement .getText ())) {
@@ -154,17 +155,43 @@ public static void text(WebDriver chromeDriver, String url, String fileName) {
154
155
155
156
// 标题
156
157
List <String > lines = new ArrayList <>();
157
- lines .add ("第" + titleElement .getText ().substring (0 ,4 ) + "章 " + titleElement .getText ().substring (4 ));
158
- lines .add ("" );
158
+ String title = "第" + titleElement .getText ().substring (0 ,4 ).trim () + "章 " + titleElement .getText ().substring (4 ).trim ();
159
+ title .replaceAll ("/?" , "" );
160
+ title .replaceAll (":" , "" );
161
+ System .out .println (title );
162
+ lines .add (title );
159
163
160
164
// 正文
161
165
WebElement conWebElement = chromeDriver .findElement (By .id ("content" ));
162
166
String con = conWebElement .getText ();
163
- // con = con.replaceAll("<br/>", "/r/n");
164
- lines .add (con );
167
+ con = con .replaceAll ("<br/>" , "" );
168
+ con = con .replaceAll ("\n " , "" );
169
+ con = con .replaceAll ("," , "," );
170
+ String [] conArray = con .split ("。" );
171
+ for (String text : conArray ) {
172
+ if (text .length () > 50 ) {
173
+ String [] conArray2 = text .split ("," );
174
+ for (int i = 0 ; i < conArray2 .length ; i ++) {
175
+ if (i + 2 < conArray2 .length ) {
176
+ lines .add (conArray2 [i ] + "," + conArray2 [++i ] + "," + conArray2 [++i ] + "," );
177
+ } else if (i + 1 < conArray2 .length ) {
178
+ lines .add (conArray2 [i ] + "," + conArray2 [++i ] + "," );
179
+ } else {
180
+ lines .add (conArray2 [i ] + "。" );
181
+ }
182
+ lines .add ("" );
183
+ }
184
+ lines .add ("" );
185
+ } else {
186
+ lines .add (text + "。" );
187
+ lines .add ("" );
188
+ }
189
+ }
165
190
166
191
FileUtil .appendUtf8Lines (lines , fileName );
167
192
193
+ Thread .sleep (10 );
194
+
168
195
WebElement btnWebElement = chromeDriver .findElement (By .className ("bottem2" ));
169
196
List <WebElement > btnListWebElement = btnWebElement .findElements (By .tagName ("a" ));
170
197
0 commit comments