Skip to content

Commit 3ea26f6

Browse files
committed
TestSelenium爬虫小说测试
1 parent 7b10b28 commit 3ea26f6

File tree

1 file changed

+35
-8
lines changed

1 file changed

+35
-8
lines changed

TestWeb/src/test/java/com/wang/other/TestSelenium.java

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public void test02() throws Exception {
102102
}
103103

104104
@Test
105-
public void test03() throws Exception {
105+
public void test03() throws Exception{
106106
String fileName = "D:\\Program Files (x86)\\Reader_v1.9.3.2\\天命大反派.txt";
107107
if (!FileUtil.exist(fileName)) {
108108
File file = new File(fileName);
@@ -134,18 +134,19 @@ public void test03() throws Exception {
134134
WebDriver chromeDriver = new ChromeDriver(chromeOptions);
135135

136136
TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/21404025.html", fileName);
137+
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70349769.html", fileName);
137138
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70963309.html", fileName);
138139

139140
chromeDriver.close();
140141
}
141142

142-
public static void text(WebDriver chromeDriver, String url, String fileName) {
143+
public static void text(WebDriver chromeDriver, String url, String fileName) throws Exception {
143144
chromeDriver.get(url);
145+
Thread.sleep(100);
144146
WebElement boxWebElement = chromeDriver.findElement(By.className("box_con"));
145147
WebElement titleElement = boxWebElement.findElement(By.tagName("h1"));
146148

147-
// System.out.println(titleElement.getText());
148-
if (titleElement.getText().contains("950")) {
149+
if (titleElement.getText().contains("1030")) {
149150
return;
150151
}
151152
if ("玄幻:我!天命大反派".equals(titleElement.getText())) {
@@ -154,17 +155,43 @@ public static void text(WebDriver chromeDriver, String url, String fileName) {
154155

155156
// 标题
156157
List<String> lines = new ArrayList<>();
157-
lines.add("第" + titleElement.getText().substring(0,4) + "章 " + titleElement.getText().substring(4));
158-
lines.add("");
158+
String title = "第" + titleElement.getText().substring(0,4).trim() + "章 " + titleElement.getText().substring(4).trim();
159+
title.replaceAll("/?", "");
160+
title.replaceAll(":", "");
161+
System.out.println(title);
162+
lines.add(title);
159163

160164
// 正文
161165
WebElement conWebElement = chromeDriver.findElement(By.id("content"));
162166
String con = conWebElement.getText();
163-
// con = con.replaceAll("<br/>", "/r/n");
164-
lines.add(con);
167+
con = con.replaceAll("<br/>", "");
168+
con = con.replaceAll("\n", "");
169+
con = con.replaceAll(",", ",");
170+
String[] conArray = con.split("。");
171+
for (String text : conArray) {
172+
if (text.length() > 50) {
173+
String[] conArray2 = text.split(",");
174+
for (int i = 0; i < conArray2.length; i++) {
175+
if (i + 2 < conArray2.length) {
176+
lines.add(conArray2[i] + "," + conArray2[++i] + "," + conArray2[++i] + ",");
177+
} else if (i + 1 < conArray2.length) {
178+
lines.add(conArray2[i] + "," + conArray2[++i] + ",");
179+
} else {
180+
lines.add(conArray2[i] + "。");
181+
}
182+
lines.add("");
183+
}
184+
lines.add("");
185+
} else {
186+
lines.add(text + "。");
187+
lines.add("");
188+
}
189+
}
165190

166191
FileUtil.appendUtf8Lines(lines, fileName);
167192

193+
Thread.sleep(10);
194+
168195
WebElement btnWebElement = chromeDriver.findElement(By.className("bottem2"));
169196
List<WebElement> btnListWebElement = btnWebElement.findElements(By.tagName("a"));
170197

0 commit comments

Comments
 (0)