1+ #!/usr/bin/env python3
2+
3+ import os
4+ import re
5+ import markdown
6+ import glob
7+ import html
8+
9+
10+ def parse_databricks_notebook (filepath ):
11+ """Parse a Databricks .py notebook format into cells"""
12+ with open (filepath , 'r' ) as f :
13+ content = f .read ()
14+
15+ # Split by COMMAND ----------
16+ sections = re .split (r'# COMMAND ----------' , content )
17+ cells = []
18+
19+ for section in sections :
20+ if not section .strip ():
21+ continue
22+
23+ # Check if this is a markdown cell
24+ if '# MAGIC %md' in section :
25+ # Extract markdown content
26+ lines = section .split ('\n ' )
27+ md_lines = []
28+ for line in lines :
29+ if line .startswith ('# MAGIC %md' ):
30+ # Remove '# MAGIC %md'
31+ md_lines .append (line [11 :].strip ())
32+ elif line .startswith ('# MAGIC ' ):
33+ # Remove '# MAGIC '
34+ md_lines .append (line [8 :])
35+ elif line .startswith ('# MAGIC' ):
36+ # Remove '# MAGIC'
37+ md_lines .append (line [7 :])
38+
39+ md_content = '\n ' .join (md_lines )
40+ cells .append ({'type' : 'markdown' , 'content' : md_content })
41+ else :
42+ # This is a code cell
43+ # Remove any leading comments that aren't actual code
44+ lines = section .split ('\n ' )
45+ code_lines = []
46+ for line in lines :
47+ if not line .startswith ('# DBTITLE' ):
48+ code_lines .append (line )
49+
50+ code_content = '\n ' .join (code_lines ).strip ()
51+ if code_content :
52+ cells .append ({'type' : 'code' , 'content' : code_content })
53+
54+ return cells
55+
56+
57+ def convert_to_html (filepath ):
58+ """Convert Databricks .py notebook to HTML"""
59+ filename = os .path .basename (filepath )
60+ name_without_ext = os .path .splitext (filename )[0 ]
61+
62+ cells = parse_databricks_notebook (filepath )
63+ html_content = []
64+
65+ for cell in cells :
66+ if cell ['type' ] == 'markdown' :
67+ # Convert markdown to HTML
68+ md_html = markdown .markdown (
69+ cell ['content' ],
70+ extensions = ['fenced_code' , 'tables' ]
71+ )
72+ html_content .append (f'''
73+ <div class="cell border-box-sizing text_cell rendered">
74+ <div class="inner_cell">
75+ <div class="text_cell_render border-box-sizing rendered_html">
76+ { md_html }
77+ </div>
78+ </div>
79+ </div>
80+ ''' )
81+ elif cell ['type' ] == 'code' :
82+ # Create syntax highlighted code cell
83+ escaped_code = html .escape (cell ['content' ])
84+ html_content .append (f'''
85+ <div class="cell border-box-sizing code_cell rendered">
86+ <div class="input">
87+ <div class="inner_cell">
88+ <div class="input_area">
89+ <pre><code class="language-python">{ escaped_code } </code></pre>
90+ </div>
91+ </div>
92+ </div>
93+ </div>
94+ ''' )
95+
96+ # Create full HTML document with notebook styling
97+ full_html = f'''<!DOCTYPE html>
98+ <html>
99+ <head>
100+ <meta charset="utf-8">
101+ <title>{ name_without_ext } </title>
102+ <!-- Jupyter notebook CSS styling -->
103+ <style>
104+ body {{
105+ font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
106+ font-size: 13px;
107+ line-height: 1.42857143;
108+ color: #000;
109+ background-color: #fff;
110+ margin: 0;
111+ padding: 0;
112+ }}
113+
114+ .container {{
115+ width: 100%;
116+ padding: 15px;
117+ }}
118+
119+ .cell {{
120+ margin-bottom: 15px;
121+ padding: 5px 0;
122+ }}
123+
124+ .text_cell .inner_cell {{
125+ padding: 5px 5px 5px 0px;
126+ }}
127+
128+ .text_cell_render {{
129+ outline: none;
130+ resize: none;
131+ width: inherit;
132+ border-style: none;
133+ padding: 0.5em 0.5em 0.5em 0.4em;
134+ color: #000;
135+ box-sizing: border-box;
136+ }}
137+
138+ .code_cell {{
139+ background: #f7f7f7;
140+ border: none;
141+ border-radius: 2px;
142+ padding: 8px;
143+ }}
144+
145+ .input_area {{
146+ border: 1px solid #cfcfcf;
147+ border-radius: 2px;
148+ background: #f7f7f7;
149+ line-height: 1.21429em;
150+ }}
151+
152+ .input_area pre {{
153+ margin: 0;
154+ padding: 8px;
155+ font-family: Monaco, 'DejaVu Sans Mono', monospace;
156+ font-size: 11px;
157+ overflow: auto;
158+ color: #000;
159+ background-color: transparent;
160+ border: none;
161+ }}
162+
163+ .text_cell_render h1 {{
164+ font-size: 1.8em;
165+ margin: 0.67em 0;
166+ color: #1F2937;
167+ }}
168+
169+ .text_cell_render h2 {{
170+ font-size: 1.5em;
171+ margin: 0.83em 0;
172+ color: #1F2937;
173+ }}
174+
175+ .text_cell_render h3 {{
176+ font-size: 1.3em;
177+ margin: 1em 0;
178+ color: #1F2937;
179+ }}
180+
181+ .text_cell_render p {{
182+ margin: 1em 0;
183+ line-height: 1.6;
184+ }}
185+
186+ .text_cell_render ul, .text_cell_render ol {{
187+ margin: 1em 0;
188+ padding-left: 2em;
189+ }}
190+
191+ .text_cell_render li {{
192+ margin: 0.5em 0;
193+ }}
194+
195+ .text_cell_render code {{
196+ background: #f8f9fa;
197+ padding: 2px 6px;
198+ border-radius: 4px;
199+ font-family: Monaco, 'DejaVu Sans Mono', monospace;
200+ color: #e91e63;
201+ }}
202+
203+ .text_cell_render pre {{
204+ background: #f8f9fa;
205+ padding: 15px;
206+ border-radius: 6px;
207+ overflow-x: auto;
208+ border: 1px solid #e5e7eb;
209+ }}
210+
211+ .text_cell_render blockquote {{
212+ border-left: 4px solid #ddd;
213+ padding-left: 15px;
214+ margin: 15px 0;
215+ color: #666;
216+ }}
217+
218+ /* Syntax highlighting */
219+ .language-python {{
220+ color: #333;
221+ }}
222+
223+ .highlight {{
224+ background: #f8f8f8;
225+ }}
226+ </style>
227+
228+ <!-- Prism.js for syntax highlighting -->
229+ <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css" rel="stylesheet" />
230+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-core.min.js"></script>
231+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/autoloader/prism-autoloader.min.js"></script>
232+ </head>
233+ <body>
234+ <div class="container">
235+ { '' .join (html_content )}
236+ </div>
237+ </body>
238+ </html>'''
239+
240+ # Write HTML file
241+ output_path = f"site/{ name_without_ext } .html"
242+ with open (output_path , 'w' ) as f :
243+ f .write (full_html )
244+
245+ return name_without_ext
246+
247+
248+ if __name__ == "__main__" :
249+ # Process all .py files in notebooks directory
250+ for py_file in glob .glob ('notebooks/*.py' ):
251+ convert_to_html (py_file )
252+ print (f"Converted { py_file } to HTML" )
0 commit comments