1
- import md5 from 'md5'
2
- import html2markdown from 'html-to-md'
3
- import { websites , hooks } from './websites'
4
- import merge from 'webpack-merge'
5
- import 'mathjax/es5/tex-svg'
1
+ import { websites } from './websites'
6
2
import {
7
3
isExtension ,
8
- getExt ,
9
- query ,
10
- getText ,
11
- getAttribute ,
12
- queryAll ,
13
- noop ,
14
4
sendMessage ,
15
- formatDate ,
16
- insertAfter ,
17
- getUrl
18
5
} from './utils'
6
+ import { downloadMarkdown } from './markdown'
19
7
20
-
21
- const setInfo = ( data ) => {
22
- data = Object . assign ( {
23
- date : formatDate ( 'yyyy-MM-dd HH:mm:ss' ) ,
24
- coypright : false ,
25
- url : location . href ,
26
- description : '转载' ,
27
- } , data instanceof Object ? data : { } )
28
- return `---
29
- title: {{title}}
30
- date: {{date}}
31
- copyright: {{coypright}}
32
- author: {{author}}
33
- home: {{home}}
34
- origin: {{origin}}
35
- url: {{url}}
36
- tag: {{tag}}
37
- categories: {{categories}}
38
- description: {{description}}
39
- ---
40
- ` . replace ( / \n \s + / g, '\n' ) . replace ( / \{ \{ ( .* ?) \} \} / g, ( s , s1 ) => data [ s1 ] === void 0 ? '' : data [ s1 ] )
41
- }
42
-
43
- const getMarkdown = ( markdownBody ) => {
44
- return markdownBody . innerHTML
45
- // .replace(/<(\/|)(pre|p|figcaption|figure)>/g, '')
46
- // .replace(/(<|>)/g, (s, s1) => ({
47
- // '<': '<', '>': '>'
48
- // }[s1] || s))
49
- }
50
-
51
- const convert = async ( options , customOptions ) => {
52
- const context = { }
53
- const defaultOptions = {
54
- origin : 'juejin' ,
55
- // 处理链接
56
- link : true ,
57
- // 处理换行
58
- br : false ,
59
- // 处理代码块
60
- code : false ,
61
- lazyKey : 'data-src' ,
62
- selectors : {
63
- title : '.article-title' ,
64
- body : '.markdown-body' ,
65
- copyBtn : '.copy-code-btn' ,
66
- userName : '.username .name' ,
67
- userLink : '.username' ,
68
- invalid : 'style' ,
69
- unpack : ''
70
- }
71
- }
72
- customOptions = customOptions instanceof Object ? customOptions : { }
73
- options = merge ( { } , defaultOptions , options instanceof Object ? options : { } , customOptions )
74
- if ( options . context ) {
75
- if ( typeof options . context === 'string' ) {
76
- const el = document . createElement ( 'div' )
77
- el . innerHTML = options . context
78
- options . context = el
79
- } else {
80
- options . context = options . context instanceof Node ? options . context : void 0
81
- }
82
- }
83
- const { origin, selectors} = options
84
- const hook = hooks [ origin ] || { }
85
- const result = await noop ( hook . beforeExtract ) ( Object . assign ( context , {
86
- options
87
- } ) )
88
- if ( result instanceof Object ) {
89
- return result
90
- }
91
- const markdownBody = query ( selectors . body , options . context ) . cloneNode ( true )
92
- const fileName = ( getText ( selectors . title ) || document . title )
93
- const realName = fileName . replace ( / [ \\ \/ \? < > : ' \* \| ] / g, '_' )
94
- noop ( hook . extract ) ( context , { markdownBody, fileName, realName } )
95
- queryAll ( selectors . copyBtn , markdownBody ) . map ( item => item . parentElement . removeChild ( item ) )
96
- queryAll ( '[data-id]' , markdownBody ) . map ( item => item . removeAttribute ( 'data-id' ) )
97
- if ( selectors . invalid ) {
98
- queryAll ( selectors . invalid , markdownBody ) . map ( item => item . parentElement . removeChild ( item ) )
99
- }
100
- if ( selectors . unpack ) {
101
- queryAll ( selectors . unpack , markdownBody ) . map ( item => {
102
- const span = document . createElement ( 'span' )
103
- span . innerHTML = item . innerHTML
104
- insertAfter ( document . createElement ( 'br' ) , item )
105
- item . parentElement . replaceChild ( span , item )
106
- } )
107
- }
108
- if ( options . link ) {
109
- queryAll ( 'a' , markdownBody ) . map ( item => item . href = item . title )
110
- }
111
- if ( options . code ) {
112
- queryAll ( 'code' , markdownBody ) . map ( item => {
113
- const br = options . br || / c o p y a b l e / . test ( item . className ) ? '\n' : ''
114
- const lang = item . getAttribute ( 'lang' ) || ( item . className . split ( '-' ) || { } ) [ 1 ] || ''
115
- const text = '```' + ( lang ? ' ' + lang : '' ) + br + item . innerText + br + '```' + br
116
- item . parentElement . replaceChild ( document . createTextNode ( text ) , item )
117
- } )
118
- }
119
- const urls = [ ]
120
- const files = queryAll ( 'img' , markdownBody ) . map ( item => {
121
- const downloadName = item . getAttribute ( 'downloadName' )
122
- const downloadUrl = item . getAttribute ( 'downloadUrl' )
123
- if ( downloadName && downloadUrl ) {
124
- item . src = './' + downloadName
125
- options . urls !== false && urls . push ( downloadUrl )
126
- return {
127
- name : downloadName ,
128
- downloadUrl
129
- }
130
- }
131
- const src = item . getAttribute ( options . lazyKey ) || item . src
132
- const url = src . replace ( / \? $ / , '' )
133
- const ext = getExt ( url )
134
- const name = realName + '/' + md5 ( url ) + ( ext ? '.' + ext : '' )
135
- item . src = './' + name
136
- options . urls !== false && urls . push ( url )
137
- return {
138
- name,
139
- downloadUrl : url
140
- }
141
- } )
142
- const info = setInfo ( {
143
- title : fileName ,
144
- origin : origin ,
145
- author : getText ( selectors . userName ) ,
146
- home : getUrl ( location . origin , getAttribute ( 'href' , selectors . userLink ) ) ,
147
- description : markdownBody . innerText . replace ( / ^ ( [ \n \s ] + ) / g, '' ) . replace ( / \n / g, ' ' ) . slice ( 0 , 50 ) + '...' ,
148
- } )
149
- const markdwonDoc = html2markdown ( info + getMarkdown ( markdownBody ) , { } )
150
- const copyright = '> 当前文档由 [markdown文档下载插件](https://github.com/kscript/markdown-download) 下载, 原文链接: [' + fileName + '](' + location . href + ') '
151
- const content = await noop ( hook . formatContent ) ( context , { markdownBody, markdwonDoc } )
152
- files . push ( {
153
- name : realName + '.md' ,
154
- content : ( content && typeof content === 'string' ? content : markdwonDoc ) + '\n\n' + copyright
155
- } )
156
- files . push ( {
157
- name : realName + '/urls' ,
158
- content : urls . join ( '\n' )
159
- } )
160
- noop ( hook . afterExtract ) ( Object . assign ( context , { files } ) )
161
- return {
162
- type : 'download' ,
163
- fileName,
164
- files
165
- }
166
- }
167
-
168
- const extract = async ( options , customOptions ) => {
169
- const datas = await convert ( options , customOptions )
170
- sendMessage ( datas )
8
+ const extract = async ( options , customOptions , hook ) => {
9
+ const datas = await downloadMarkdown ( options , customOptions , hook )
10
+ datas && sendMessage ( datas )
171
11
return datas
172
12
}
173
13
@@ -184,4 +24,4 @@ if (isExtension) {
184
24
} )
185
25
}
186
26
187
- export default convert
27
+ export default downloadMarkdown
0 commit comments