Skip to content

Commit 6d2d074

Browse files
authored
Merge pull request #6 from databricks-industry-solutions/feature/modernize-to-2025-standards
Feature/modernize to 2025 standards
2 parents e45ebe0 + abb4d5e commit 6d2d074

File tree

2 files changed

+260
-4
lines changed

2 files changed

+260
-4
lines changed
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import re
5+
import markdown
6+
import glob
7+
import html
8+
9+
10+
def parse_databricks_notebook(filepath):
11+
"""Parse a Databricks .py notebook format into cells"""
12+
with open(filepath, 'r') as f:
13+
content = f.read()
14+
15+
# Split by COMMAND ----------
16+
sections = re.split(r'# COMMAND ----------', content)
17+
cells = []
18+
19+
for section in sections:
20+
if not section.strip():
21+
continue
22+
23+
# Check if this is a markdown cell
24+
if '# MAGIC %md' in section:
25+
# Extract markdown content
26+
lines = section.split('\n')
27+
md_lines = []
28+
for line in lines:
29+
if line.startswith('# MAGIC %md'):
30+
# Remove '# MAGIC %md'
31+
md_lines.append(line[11:].strip())
32+
elif line.startswith('# MAGIC '):
33+
# Remove '# MAGIC '
34+
md_lines.append(line[8:])
35+
elif line.startswith('# MAGIC'):
36+
# Remove '# MAGIC'
37+
md_lines.append(line[7:])
38+
39+
md_content = '\n'.join(md_lines)
40+
cells.append({'type': 'markdown', 'content': md_content})
41+
else:
42+
# This is a code cell
43+
# Remove any leading comments that aren't actual code
44+
lines = section.split('\n')
45+
code_lines = []
46+
for line in lines:
47+
if not line.startswith('# DBTITLE'):
48+
code_lines.append(line)
49+
50+
code_content = '\n'.join(code_lines).strip()
51+
if code_content:
52+
cells.append({'type': 'code', 'content': code_content})
53+
54+
return cells
55+
56+
57+
def convert_to_html(filepath):
58+
"""Convert Databricks .py notebook to HTML"""
59+
filename = os.path.basename(filepath)
60+
name_without_ext = os.path.splitext(filename)[0]
61+
62+
cells = parse_databricks_notebook(filepath)
63+
html_content = []
64+
65+
for cell in cells:
66+
if cell['type'] == 'markdown':
67+
# Convert markdown to HTML
68+
md_html = markdown.markdown(
69+
cell['content'],
70+
extensions=['fenced_code', 'tables']
71+
)
72+
html_content.append(f'''
73+
<div class="cell border-box-sizing text_cell rendered">
74+
<div class="inner_cell">
75+
<div class="text_cell_render border-box-sizing rendered_html">
76+
{md_html}
77+
</div>
78+
</div>
79+
</div>
80+
''')
81+
elif cell['type'] == 'code':
82+
# Create syntax highlighted code cell
83+
escaped_code = html.escape(cell['content'])
84+
html_content.append(f'''
85+
<div class="cell border-box-sizing code_cell rendered">
86+
<div class="input">
87+
<div class="inner_cell">
88+
<div class="input_area">
89+
<pre><code class="language-python">{escaped_code}</code></pre>
90+
</div>
91+
</div>
92+
</div>
93+
</div>
94+
''')
95+
96+
# Create full HTML document with notebook styling
97+
full_html = f'''<!DOCTYPE html>
98+
<html>
99+
<head>
100+
<meta charset="utf-8">
101+
<title>{name_without_ext}</title>
102+
<!-- Jupyter notebook CSS styling -->
103+
<style>
104+
body {{
105+
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
106+
font-size: 13px;
107+
line-height: 1.42857143;
108+
color: #000;
109+
background-color: #fff;
110+
margin: 0;
111+
padding: 0;
112+
}}
113+
114+
.container {{
115+
width: 100%;
116+
padding: 15px;
117+
}}
118+
119+
.cell {{
120+
margin-bottom: 15px;
121+
padding: 5px 0;
122+
}}
123+
124+
.text_cell .inner_cell {{
125+
padding: 5px 5px 5px 0px;
126+
}}
127+
128+
.text_cell_render {{
129+
outline: none;
130+
resize: none;
131+
width: inherit;
132+
border-style: none;
133+
padding: 0.5em 0.5em 0.5em 0.4em;
134+
color: #000;
135+
box-sizing: border-box;
136+
}}
137+
138+
.code_cell {{
139+
background: #f7f7f7;
140+
border: none;
141+
border-radius: 2px;
142+
padding: 8px;
143+
}}
144+
145+
.input_area {{
146+
border: 1px solid #cfcfcf;
147+
border-radius: 2px;
148+
background: #f7f7f7;
149+
line-height: 1.21429em;
150+
}}
151+
152+
.input_area pre {{
153+
margin: 0;
154+
padding: 8px;
155+
font-family: Monaco, 'DejaVu Sans Mono', monospace;
156+
font-size: 11px;
157+
overflow: auto;
158+
color: #000;
159+
background-color: transparent;
160+
border: none;
161+
}}
162+
163+
.text_cell_render h1 {{
164+
font-size: 1.8em;
165+
margin: 0.67em 0;
166+
color: #1F2937;
167+
}}
168+
169+
.text_cell_render h2 {{
170+
font-size: 1.5em;
171+
margin: 0.83em 0;
172+
color: #1F2937;
173+
}}
174+
175+
.text_cell_render h3 {{
176+
font-size: 1.3em;
177+
margin: 1em 0;
178+
color: #1F2937;
179+
}}
180+
181+
.text_cell_render p {{
182+
margin: 1em 0;
183+
line-height: 1.6;
184+
}}
185+
186+
.text_cell_render ul, .text_cell_render ol {{
187+
margin: 1em 0;
188+
padding-left: 2em;
189+
}}
190+
191+
.text_cell_render li {{
192+
margin: 0.5em 0;
193+
}}
194+
195+
.text_cell_render code {{
196+
background: #f8f9fa;
197+
padding: 2px 6px;
198+
border-radius: 4px;
199+
font-family: Monaco, 'DejaVu Sans Mono', monospace;
200+
color: #e91e63;
201+
}}
202+
203+
.text_cell_render pre {{
204+
background: #f8f9fa;
205+
padding: 15px;
206+
border-radius: 6px;
207+
overflow-x: auto;
208+
border: 1px solid #e5e7eb;
209+
}}
210+
211+
.text_cell_render blockquote {{
212+
border-left: 4px solid #ddd;
213+
padding-left: 15px;
214+
margin: 15px 0;
215+
color: #666;
216+
}}
217+
218+
/* Syntax highlighting */
219+
.language-python {{
220+
color: #333;
221+
}}
222+
223+
.highlight {{
224+
background: #f8f8f8;
225+
}}
226+
</style>
227+
228+
<!-- Prism.js for syntax highlighting -->
229+
<link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css" rel="stylesheet" />
230+
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-core.min.js"></script>
231+
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/autoloader/prism-autoloader.min.js"></script>
232+
</head>
233+
<body>
234+
<div class="container">
235+
{''.join(html_content)}
236+
</div>
237+
</body>
238+
</html>'''
239+
240+
# Write HTML file
241+
output_path = f"site/{name_without_ext}.html"
242+
with open(output_path, 'w') as f:
243+
f.write(full_html)
244+
245+
return name_without_ext
246+
247+
248+
if __name__ == "__main__":
249+
# Process all .py files in notebooks directory
250+
for py_file in glob.glob('notebooks/*.py'):
251+
convert_to_html(py_file)
252+
print(f"Converted {py_file} to HTML")

.github/workflows/publish.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: publish
22

33
on:
44
push:
5-
branches: [ main, preview ]
5+
branches: [ main, preview, feature/modernize-to-2025-standards ]
66
workflow_dispatch:
77

88
permissions:
@@ -39,13 +39,17 @@ jobs:
3939
run: |
4040
mkdir -p site
4141
42-
# Convert notebooks to HTML with code visible
42+
# Convert .ipynb notebooks to HTML with code visible
4343
for notebook in notebooks/*.ipynb; do
4444
if [ -f "$notebook" ]; then
4545
jupyter nbconvert "$notebook" --to html --output-dir site --template classic --HTMLExporter.theme=light
4646
fi
4747
done
4848
49+
# Convert .py Databricks notebooks to HTML
50+
chmod +x .github/scripts/convert_notebooks.py
51+
python3 .github/scripts/convert_notebooks.py
52+
4953
# Create simple index page
5054
python3 << 'EOF'
5155
import os
@@ -61,11 +65,11 @@ jobs:
6165
repo_name = os.environ.get('GITHUB_REPOSITORY', '').split('/')[-1]
6266
title = ' '.join(word.capitalize() for word in repo_name.split('-')) + ' Accelerator'
6367
64-
# Find notebook files
68+
# Find notebook files (.py and .ipynb)
6569
notebook_files = []
6670
if os.path.exists('site'):
6771
for f in os.listdir('site'):
68-
if f.endswith('.html'):
72+
if f.endswith('.html') and f != 'index.html':
6973
notebook_files.append(f[:-5]) # Remove .html
7074
7175
# Create index.html

0 commit comments

Comments
 (0)