Skip to content

Commit db7f3a5

Browse files
committed
Add the arguments PrintQuerysInAllResult and PrintTextInAllResult
PrintQuerysInAllResult: Enable printing of query conditions in results PrintTextInAllResult: Enter a text content for printing in all results. If the content contains "#{serial number}", the content of the specified serial number's result will be automatically printed in all results following that serial number
1 parent 8cbf736 commit db7f3a5

File tree

2 files changed

+97
-67
lines changed

2 files changed

+97
-67
lines changed

README.md

Lines changed: 69 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
htmlq v1.0.3
1+
htmlq v1.0.4
22
====
33

44
`htmlq` is a `command-line` tool that allows you to query HTML using `CSS selectors` or `XPATH` and retrieve the corresponding `text content` (similar to JavaScript's `document.querySelector(query).textContent`).
@@ -8,10 +8,11 @@ Usage
88
====
99

1010
```
11-
usage: htmlq 1.0.3 [-h|--help] [-f|--file "<value>"] [-t|--text "<value>"]
11+
usage: htmlq 1.0.4 [-h|--help] [-f|--file "<value>"] [-t|--text "<value>"]
1212
[-u|--url "<value>"] [-x|--XPATH] [-a|--SelectorAll]
1313
[-r|--ResultAsNode] [-l|--PrintLastResult]
14-
[--PrintLastResultTemp] [-H|--headers "<value>"
14+
[--PrintLastResultTemp] [--PrintQuerysInAllResult]
15+
[--PrintTextInAllResult "<value>"] [-H|--headers "<value>"
1516
[-H|--headers "<value>" ...]] [-v|--verbose] -q|--querys
1617
"<value>" [-q|--querys "<value>" ...]
1718
@@ -22,66 +23,71 @@ usage: htmlq 1.0.3 [-h|--help] [-f|--file "<value>"] [-t|--text "<value>"]
2223
2324
Arguments:
2425
25-
-h --help Print help information
26-
-f --file Enter the relative or absolute path of the HTML
27-
file
28-
-t --text Enter the HTML text content
29-
-u --url Enter the URL of the HTML
30-
31-
-x --XPATH Enable default XPATH query syntax instead of CSS
32-
Selectors. Default: false
33-
-a --SelectorAll Enable the SelectorAll mechanism. When there are
34-
multiple results for a single query, it will
35-
return all the results (similar to
36-
querySelectorAll). By default, it is not enabled,
37-
and only a single result is returned (similar to
38-
querySelector).. Default: false
39-
-r --ResultAsNode Enable using the Node from the previous query
40-
result as the current query's root Node. Default:
41-
false
42-
-l --PrintLastResult Enable printing the content of the last result in
43-
the output when using the "#lastresult" syntax in
44-
the query. Default: false
45-
--PrintLastResultTemp Enable printing the temporary content of the
46-
source data as last result in the output, when
47-
using the "#lastresult" syntax in query. Default:
48-
false
49-
50-
-H --headers When the query item is a table or multiple td
51-
fields, you can enter corresponding names for each
52-
individual field in a single query using the
53-
format "#{serial
54-
number}:header1Name;header2Name;header3Name;...",
55-
where the serial number represents the Nth query
56-
starting from zero.
57-
-v --verbose verbose
58-
-q --querys Enter a query value to retrieve the Node Text of
59-
the target HTML. The default query method is CSS
60-
Selectors, but it can be changed to XPATH using
61-
"-x" or "--XPATH". Both query methods can be
62-
mixed. When a query starts with "C:", it
63-
represents CSS Selectors, and when it starts with
64-
"X:", it represents XPATH.
65-
66-
You can enter multiple query values separated by spaces and return all the
67-
results.
68-
69-
* When the "--ResultNode" is enabled, you can use the following special query
70-
values to change the current position of the HTML
71-
Node:
72-
- Parent: Move the Node up one level
73-
- NextSibling: Move the Node to the next sibling in the same level
74-
- PrevSibling: Move the Node to the previous sibling in the same level
75-
- FirstChild: Move the Node to the first child in the same level
76-
- LastChild: Move the Node to the last child in the same level
77-
- reset: Restore the Node to the root node of the original input
78-
79-
* When using the "#lastresult" query syntax, the text content of the previous
80-
query will automatically replace
81-
"#lastresult".
82-
83-
Example: --ResultNode -q Query1 Parent Query2 NextSibling Query3 LastChild
84-
C:Query4 reset X:Query5
26+
-h --help Print help information
27+
-f --file Enter the relative or absolute path of the HTML
28+
file
29+
-t --text Enter the HTML text content
30+
-u --url Enter the URL of the HTML
31+
-x --XPATH Enable default XPATH query syntax instead of
32+
CSS Selectors. Default: false
33+
-a --SelectorAll Enable the SelectorAll mechanism. When there
34+
are multiple results for a single query, it
35+
will return all the results (similar to
36+
querySelectorAll). By default, it is not
37+
enabled, and only a single result is returned
38+
(similar to querySelector).. Default: false
39+
-r --ResultAsNode Enable using the Node from the previous query
40+
result as the current query's root Node.
41+
Default: false
42+
-l --PrintLastResult Enable printing the content of the last result
43+
in the output when using the "#lastresult"
44+
syntax in the query. Default: false
45+
--PrintLastResultTemp Enable printing the temporary content of the
46+
source data as last result in the output, when
47+
using the "#lastresult" syntax in query.
48+
Default: false
49+
--PrintQuerysInAllResult Enable printing of query conditions in
50+
results.. Default: false
51+
--PrintTextInAllResult Enter a text content for printing in all
52+
results. If the content contains "#{serial
53+
number}", the content of the specified serial
54+
number's result will be automatically printed
55+
in all results following that serial number.
56+
-H --headers When the query item is a table or multiple td
57+
fields, you can enter corresponding names for
58+
each individual field in a single query using
59+
the format "#{serial
60+
number}:header1Name;header2Name;header3Name;...",
61+
where the serial number represents the Nth
62+
query starting from zero.
63+
-v --verbose verbose
64+
-q --querys Enter a query value to retrieve the Node Text
65+
of the target HTML. The default query method is
66+
CSS Selectors, but it can be changed to XPATH
67+
using "-x" or "--XPATH". Both query methods can
68+
be mixed. When a query starts with "C:", it
69+
represents CSS Selectors, and when it starts
70+
with "X:", it represents XPATH.
71+
72+
You can enter multiple query values separated by spaces and return all the
73+
results.
74+
75+
* When the "--ResultNode" is enabled, you can use the following special query
76+
values to change the current position of the
77+
HTML Node:
78+
- Parent: Move the Node up one level
79+
- NextSibling: Move the Node to the next sibling in the same level
80+
- PrevSibling: Move the Node to the previous sibling in the same level
81+
- FirstChild: Move the Node to the first child in the same level
82+
- LastChild: Move the Node to the last child in the same level
83+
- reset: Restore the Node to the root node of the original input
84+
85+
* When using the "#lastresult" query syntax, the text content of the previous
86+
query will automatically replace
87+
"#lastresult".
88+
89+
Example: --ResultNode -q Query1 Parent Query2 NextSibling Query3 LastChild
90+
C:Query4 reset X:Query5
8591
```
8692

8793
Examples

main.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ var (
2222
results [][]interface{}
2323
headerMap = make(map[int]interface{})
2424
// Create new parser object
25-
parser = argparse.NewParser("htmlq 1.0.3", "A command-line tool that allows you to query HTML using CSS selectors or XPATH and retrieve the corresponding text content (similar to JavaScript's `document.querySelector(query).textContent`)")
25+
parser = argparse.NewParser("htmlq 1.0.4", "A command-line tool that allows you to query HTML using CSS selectors or XPATH and retrieve the corresponding text content (similar to JavaScript's `document.querySelector(query).textContent`)")
2626

2727
// Create filePath flag
2828
filePath = parser.String("f", "file", &argparse.Options{Help: `Enter the relative or absolute path of the HTML file`})
@@ -41,6 +41,10 @@ var (
4141
isPrintLastResult = parser.Flag("l", "PrintLastResult", &argparse.Options{Help: `Enable printing the content of the last result in the output when using the "#lastresult" syntax in the query`, Default: false})
4242
// Create isPrintLastResultTemp flag
4343
isPrintLastResultTemp = parser.Flag("", "PrintLastResultTemp", &argparse.Options{Help: `Enable printing the temporary content of the source data as last result in the output, when using the "#lastresult" syntax in query`, Default: false})
44+
// Create isPrintQuerysInAllResult flag
45+
isPrintQuerysInAllResult = parser.Flag("", "PrintQuerysInAllResult", &argparse.Options{Help: `Enable printing of query conditions in results.`, Default: false})
46+
// Create PrintTextToAllResult flag
47+
printTextInAllResult = parser.String("", "PrintTextInAllResult", &argparse.Options{Help: `Enter a text content for printing in all results. If the content contains "#{serial number}", the content of the specified serial number's result will be automatically printed in all results following that serial number.`})
4448

4549
// Create headers flag
4650
headers = parser.StringList("H", "headers", &argparse.Options{Help: `When the query item is a table or multiple td fields, you can enter corresponding names for each individual field in a single query using the format "#{serial number}:header1Name;header2Name;header3Name;...", where the serial number represents the Nth query starting from zero.`})
@@ -153,6 +157,13 @@ func main() {
153157
headerMap[queryID] = heads
154158
}
155159

160+
var printText string = ""
161+
var printTextResultID *int = new(int)
162+
printTextMatchs := regexp.MustCompile(`\#result(\d+)`).FindStringSubmatch(*printTextInAllResult)
163+
if (len(printTextMatchs)) == 2 {
164+
*printTextResultID, _ = strconv.Atoi(printTextMatchs[1])
165+
}
166+
156167
currDoc := htmlDoc
157168
var selected []*html.Node = nil
158169
for qIdx, qVal := range *querys {
@@ -226,17 +237,30 @@ func main() {
226237
info := fmt.Sprint(len(results), ": ")
227238
info += qVal
228239
fmt.Println()
229-
color.HiGreen(info) //fmt.Println(info)
240+
color.HiGreen(info)
241+
}
242+
243+
if strings.Contains(*printTextInAllResult, "#result") && printTextResultID != nil && len(results) > *printTextResultID {
244+
printText = fmt.Sprintf("%q", results[*printTextResultID])
245+
printText = strings.NewReplacer(`" "`, ";", "[", "", "]", "", `"`, "").Replace(printText)
246+
*printTextInAllResult = strings.ReplaceAll(*printTextInAllResult, fmt.Sprintf("#result%v", *printTextResultID), printText)
230247
}
248+
231249
for idx, text := range result {
232250
if *verboses > 0 {
233-
color.HiCyan("%d-%d:\n", len(results), idx) //fmt.Printf("%d-%d:\n", len(results), idx)
251+
color.HiCyan("%d-%d:\n", len(results), idx)
252+
if *isPrintQuerysInAllResult {
253+
color.HiMagenta(qVal)
254+
}
255+
if len(*printTextInAllResult) > 0 && !strings.Contains(*printTextInAllResult, "#result") {
256+
color.HiYellow(*printTextInAllResult)
257+
}
234258
if *isPrintLastResult && strings.Contains(qVal, "#lastresult") && len(results)-1 >= 0 {
235259
fmt.Printf("%s\n", results[len(results)-1][idx])
236260
}
237261
}
238262

239-
color.HiBlue("%s", text) //fmt.Println(text)
263+
color.HiBlue("%s", text)
240264
}
241265
}
242266
fmt.Println()

0 commit comments

Comments
 (0)