Skip to content

Commit 3b27e29

Browse files
committed
TestSelenium爬虫测试
1 parent 2d2eb53 commit 3b27e29

File tree

4 files changed

+245
-6
lines changed

4 files changed

+245
-6
lines changed

TestWeb/pom.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,20 @@
137137
<version>10.2.3</version>
138138
</dependency>
139139

140+
<!-- selenium-java -->
141+
<dependency>
142+
<groupId>org.seleniumhq.selenium</groupId>
143+
<artifactId>selenium-java</artifactId>
144+
<version>3.4.0</version>
145+
</dependency>
146+
147+
<!-- commons-math3 -->
148+
<dependency>
149+
<groupId>org.apache.commons</groupId>
150+
<artifactId>commons-math3</artifactId>
151+
<version>3.3</version>
152+
</dependency>
153+
140154
</dependencies>
141155

142156
<build>

TestWeb/src/main/java/com/wang/util/OkHttpUtil.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.slf4j.LoggerFactory;
1111

1212
import java.io.IOException;
13+
import java.util.concurrent.TimeUnit;
1314

1415
/**
1516
* OkHttpUtil
@@ -45,14 +46,17 @@ public class OkHttpUtil {
4546
*/
4647
public static String get(String url) {
4748
String result = null;
48-
OkHttpClient client = new OkHttpClient.Builder().build();
49+
OkHttpClient client = new OkHttpClient.Builder()
50+
.connectTimeout(60, TimeUnit.SECONDS)
51+
.readTimeout(60, TimeUnit.SECONDS)
52+
.writeTimeout(60, TimeUnit.SECONDS).build();
4953
Request request = new Request.Builder().url(url).build();
50-
logger.info("请求地址:{}", url);
54+
// logger.info("请求地址:{}", url);
5155
try (Response response = client.newCall(request).execute()) {
5256
result = response.body().string();
53-
logger.info("请求地址:{},请求结果:{}", url, result);
57+
// logger.info("请求地址:{},请求结果:{}", url, result);
5458
} catch (Exception e) {
55-
logger.error("请求地址:{},请求异常:{}", url, ExceptionUtil.stacktraceToOneLineString(e));
59+
// logger.error("请求地址:{},请求异常:{}", url, ExceptionUtil.stacktraceToOneLineString(e));
5660
}
5761
return result;
5862
}
@@ -98,8 +102,7 @@ public static String postJson(String url, String json) {
98102
RequestBody requestBody = RequestBody.create(MEDIA_TYPE_JSON, json);
99103
Request request = new Request.Builder().url(url).post(requestBody).build();
100104
logger.info("请求地址:{},请求参数:{}", url, json);
101-
try {
102-
Response response = httpClient.newCall(request).execute();
105+
try (Response response = httpClient.newCall(request).execute()) {
103106
result = response.body().string();
104107
logger.info("请求地址:{},请求参数:{},请求结果:{}", url, json, result);
105108
} catch (IOException e) {

TestWeb/src/test/java/com/wang/httpclient/TestHttpClient.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.wang.httpclient;
22

3+
import com.wang.util.OkHttpUtil;
34
import org.apache.http.HttpEntity;
45
import org.apache.http.client.methods.CloseableHttpResponse;
56
import org.apache.http.client.methods.HttpGet;
@@ -75,4 +76,16 @@ public void test() {
7576

7677
}
7778
}
79+
80+
@Test
81+
public void test2() throws Exception {
82+
for (Integer i = 0; i < 40; i++) {
83+
// Thread.sleep(500L);
84+
new Thread(() -> {
85+
Long timeBegin = System.currentTimeMillis();
86+
System.out.println(Thread.currentThread().getName() + ":" + OkHttpUtil.get("http://localhost:9999/test/priview") + ":" + (System.currentTimeMillis() - timeBegin) / 1000.0D);
87+
}, "thread" + i).start();
88+
}
89+
System.in.read();
90+
}
7891
}
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
package com.wang.other;
2+
3+
import cn.hutool.core.io.FileUtil;
4+
import cn.hutool.core.util.StrUtil;
5+
import org.junit.jupiter.api.Test;
6+
import org.openqa.selenium.By;
7+
import org.openqa.selenium.Dimension;
8+
import org.openqa.selenium.WebDriver;
9+
import org.openqa.selenium.WebElement;
10+
import org.openqa.selenium.chrome.ChromeDriver;
11+
import org.openqa.selenium.chrome.ChromeOptions;
12+
13+
import java.io.BufferedOutputStream;
14+
import java.io.File;
15+
import java.io.IOException;
16+
import java.util.ArrayList;
17+
import java.util.Base64;
18+
import java.util.List;
19+
20+
/**
21+
* TestSelenium
22+
*
23+
* @author wliduo[i@dolyw.com]
24+
* @date 2021/10/29 16:54
25+
*/
26+
public class TestSelenium {
27+
28+
@Test
29+
public void test01() throws Exception {
30+
System.setProperty("webdriver.chrome.driver", "D:\\Tools\\chromedriver_win32\\chromedriver.exe");
31+
WebDriver driver = new ChromeDriver();
32+
driver.get("http://localhost:8080");
33+
// String title = driver.getTitle();
34+
// System.out.printf(title);
35+
WebElement msgWebElement = driver.findElement(By.id("msg"));
36+
String base64 = msgWebElement.getText();
37+
while (true) {
38+
base64 = msgWebElement.getText();
39+
System.out.println(base64);
40+
if (StrUtil.isNotBlank(base64)) {
41+
break;
42+
}
43+
}
44+
String[] baseStr = base64.split(",");
45+
base64ToFile(baseStr[1], "xxx.pdf", "E:\\pdf\\");
46+
47+
/*driver.get("https://www.baidu.com");
48+
driver.manage().window().setSize(new Dimension(480, 800));
49+
Thread.sleep(2000);*/
50+
51+
// driver.quit();
52+
Thread.sleep(2000);
53+
driver.get("https://www.baidu.com");
54+
55+
// driver.get("https://gitee.com/dolyw");
56+
Thread.sleep(2000);
57+
driver.quit();
58+
59+
60+
driver.close();
61+
}
62+
63+
@Test
64+
public void test02() throws Exception {
65+
System.setProperty("webdriver.chrome.driver", "D:\\Tools\\chromedriver_win32\\chromedriver.exe");
66+
ChromeOptions chromeOptions = new ChromeOptions();
67+
chromeOptions.addArguments("--headless");
68+
chromeOptions.addArguments("--no-sandbox");
69+
chromeOptions.addArguments("--disable-dev-shm-usage");
70+
/*chromeOptions.addArguments("--disable-gpu");
71+
chromeOptions.addArguments("--disable-java");
72+
chromeOptions.addArguments("--disable-plugins");
73+
chromeOptions.addArguments("--disable-images");
74+
chromeOptions.addArguments("--disable-popup-blocking");*/
75+
// chromeOptions.addArguments("--single-process");
76+
// chromeOptions.addArguments("--disable-extensions");
77+
// 禁止默认浏览器检查
78+
// chromeOptions.addArguments("no-default-browser-check");
79+
// chromeOptions.addArguments("about:histograms");
80+
// chromeOptions.addArguments("about:cache");
81+
WebDriver driver = new ChromeDriver(chromeOptions);
82+
try {
83+
driver.get("https://www.areacodelocations.info/areacodelist.html");
84+
// String title = driver.getTitle();
85+
// System.out.printf(title);
86+
WebElement webElement = driver.findElement(By.tagName("tbody"));
87+
List<WebElement> webElementList = webElement.findElements(By.tagName("tr"));
88+
for (WebElement webElementTr : webElementList) {
89+
List<WebElement> webElementTDList = webElementTr.findElements(By.tagName("td"));
90+
if (webElementTDList.size() > 0) {
91+
if (webElementTDList.get(1).getText().indexOf("Canada") >= 0) {
92+
if (!"-".equals(webElementTDList.get(2).getText())) {
93+
System.out.println(webElementTDList.get(0).getText() + ":" + webElementTDList.get(1).getText() + ":" + webElementTDList.get(2).getText());
94+
}
95+
}
96+
}
97+
}
98+
} finally {
99+
driver.quit();
100+
driver.close();
101+
}
102+
}
103+
104+
@Test
105+
public void test03() throws Exception {
106+
String fileName = "D:\\Program Files (x86)\\Reader_v1.9.3.2\\天命大反派.txt";
107+
if (!FileUtil.exist(fileName)) {
108+
File file = new File(fileName);
109+
file.createNewFile();
110+
} else {
111+
System.out.println("文件已经存在");
112+
return;
113+
}
114+
115+
if (System.getProperties().getProperty("os.name").contains("Windows")) {
116+
System.setProperty("webdriver.chrome.driver", "D:\\Tools\\chromedriver_win32\\chromedriver2.exe");
117+
}
118+
ChromeOptions chromeOptions = new ChromeOptions();
119+
chromeOptions.addArguments("--headless");
120+
chromeOptions.addArguments("--no-sandbox");
121+
chromeOptions.addArguments("--disable-dev-shm-usage");
122+
chromeOptions.addArguments("--disable-gpu");
123+
chromeOptions.addArguments("--disable-java");
124+
chromeOptions.addArguments("--disable-plugins");
125+
chromeOptions.addArguments("--disable-images");
126+
chromeOptions.addArguments("--disable-popup-blocking");
127+
chromeOptions.addArguments("--single-process");
128+
chromeOptions.addArguments("--disable-extensions");
129+
// 禁止默认浏览器检查
130+
chromeOptions.addArguments("no-default-browser-check");
131+
chromeOptions.addArguments("about:histograms");
132+
chromeOptions.addArguments("about:cache");
133+
134+
WebDriver chromeDriver = new ChromeDriver(chromeOptions);
135+
136+
TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/21404025.html", fileName);
137+
// TestSelenium.text(chromeDriver, "https://www.xs123.org/xs/33/33112/70963309.html", fileName);
138+
139+
chromeDriver.close();
140+
}
141+
142+
public static void text(WebDriver chromeDriver, String url, String fileName) {
143+
chromeDriver.get(url);
144+
WebElement boxWebElement = chromeDriver.findElement(By.className("box_con"));
145+
WebElement titleElement = boxWebElement.findElement(By.tagName("h1"));
146+
147+
// System.out.println(titleElement.getText());
148+
if (titleElement.getText().contains("950")) {
149+
return;
150+
}
151+
if ("玄幻:我!天命大反派".equals(titleElement.getText())) {
152+
return;
153+
}
154+
155+
// 标题
156+
List<String> lines = new ArrayList<>();
157+
lines.add("第" + titleElement.getText().substring(0,4) + "章 " + titleElement.getText().substring(4));
158+
lines.add("");
159+
160+
// 正文
161+
WebElement conWebElement = chromeDriver.findElement(By.id("content"));
162+
String con = conWebElement.getText();
163+
// con = con.replaceAll("<br/>", "/r/n");
164+
lines.add(con);
165+
166+
FileUtil.appendUtf8Lines(lines, fileName);
167+
168+
WebElement btnWebElement = chromeDriver.findElement(By.className("bottem2"));
169+
List<WebElement> btnListWebElement = btnWebElement.findElements(By.tagName("a"));
170+
171+
// 递归调用
172+
TestSelenium.text(chromeDriver, btnListWebElement.get(2).getAttribute("href"), fileName);
173+
}
174+
175+
public static void base64ToFile(String base64, String fileName, String savePath) {
176+
File file = null;
177+
String filePath = savePath;
178+
File dir = new File(filePath);
179+
if (!dir.exists() && !dir.isDirectory()) {
180+
dir.mkdirs();
181+
}
182+
BufferedOutputStream bos = null;
183+
java.io.FileOutputStream fos = null;
184+
try {
185+
byte[] bytes = Base64.getDecoder().decode(base64);
186+
file = new File(filePath + fileName);
187+
fos = new java.io.FileOutputStream(file);
188+
bos = new BufferedOutputStream(fos);
189+
bos.write(bytes);
190+
} catch (Exception e) {
191+
e.printStackTrace();
192+
} finally {
193+
if (bos != null) {
194+
try {
195+
bos.close();
196+
} catch (IOException e) {
197+
e.printStackTrace();
198+
}
199+
}
200+
if (fos != null) {
201+
try {
202+
fos.close();
203+
} catch (IOException e) {
204+
e.printStackTrace();
205+
}
206+
}
207+
}
208+
}
209+
}

0 commit comments

Comments
 (0)