From f5144d0d1e2fd955572d6ba9f55fff65462239d4 Mon Sep 17 00:00:00 2001 From: Altay Sansal Date: Tue, 29 Apr 2025 10:17:11 -0500 Subject: [PATCH 01/55] update project metadata and deps --- uv.lock | 333 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 297 insertions(+), 36 deletions(-) diff --git a/uv.lock b/uv.lock index de95c4e6..c2ddbc3c 100644 --- a/uv.lock +++ b/uv.lock @@ -279,6 +279,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, ] +[[package]] +name = "bandit" +version = "1.8.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "stevedore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1a/a5/144a45f8e67df9d66c3bc3f7e69a39537db8bff1189ab7cff4e9459215da/bandit-1.8.3.tar.gz", hash = "sha256:f5847beb654d309422985c36644649924e0ea4425c76dec2e89110b87506193a", size = 4232005 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/85/db74b9233e0aa27ec96891045c5e920a64dd5cbccd50f8e64e9460f48d35/bandit-1.8.3-py3-none-any.whl", hash = "sha256:28f04dc0d258e1dd0f99dee8eefa13d1cb5e3fde1a5ab0c523971f97b289bcd8", size = 129078 }, +] + [[package]] name = "beautifulsoup4" version = "4.13.4" @@ -289,7 +304,35 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 }, + { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 }, +] + +[[package]] +name = "black" +version = "24.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/0d/cc2fb42b8c50d80143221515dd7e4766995bd07c56c9a3ed30baf080b6dc/black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", size = 645813 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/cc/7496bb63a9b06a954d3d0ac9fe7a73f3bf1cd92d7a58877c27f4ad1e9d41/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", size = 1607468 }, + { url = "https://files.pythonhosted.org/packages/2b/e3/69a738fb5ba18b5422f50b4f143544c664d7da40f09c13969b2fd52900e0/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", size = 1437270 }, + { url = "https://files.pythonhosted.org/packages/c9/9b/2db8045b45844665c720dcfe292fdaf2e49825810c0103e1191515fc101a/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", size = 1737061 }, + { url = "https://files.pythonhosted.org/packages/a3/95/17d4a09a5be5f8c65aa4a361444d95edc45def0de887810f508d3f65db7a/black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", size = 1423293 }, + { url = "https://files.pythonhosted.org/packages/90/04/bf74c71f592bcd761610bbf67e23e6a3cff824780761f536512437f1e655/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", size = 1644256 }, + { url = "https://files.pythonhosted.org/packages/4c/ea/a77bab4cf1887f4b2e0bce5516ea0b3ff7d04ba96af21d65024629afedb6/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", size = 1448534 }, + { url = "https://files.pythonhosted.org/packages/4e/3e/443ef8bc1fbda78e61f79157f303893f3fddf19ca3c8989b163eb3469a12/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", size = 1761892 }, + { url = "https://files.pythonhosted.org/packages/52/93/eac95ff229049a6901bc84fec6908a5124b8a0b7c26ea766b3b8a5debd22/black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8", size = 1434796 }, + { url = "https://files.pythonhosted.org/packages/d0/a0/a993f58d4ecfba035e61fca4e9f64a2ecae838fc9f33ab798c62173ed75c/black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981", size = 1643986 }, + { url = "https://files.pythonhosted.org/packages/37/d5/602d0ef5dfcace3fb4f79c436762f130abd9ee8d950fa2abdbf8bbc555e0/black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b", size = 1448085 }, + { url = "https://files.pythonhosted.org/packages/47/6d/a3a239e938960df1a662b93d6230d4f3e9b4a22982d060fc38c42f45a56b/black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2", size = 1760928 }, + { url = "https://files.pythonhosted.org/packages/dd/cf/af018e13b0eddfb434df4d9cd1b2b7892bab119f7a20123e93f6910982e8/black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b", size = 1436875 }, + { url = "https://files.pythonhosted.org/packages/8d/a7/4b27c50537ebca8bec139b872861f9d2bf501c5ec51fcf897cb924d9e264/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d", size = 206898 }, ] [[package]] @@ -677,36 +720,43 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/13/1f/9fa001e74a1993a9cadd2333bb889e50c66327b8594ac538ab8a04f915b7/cryptography-45.0.3.tar.gz", hash = "sha256:ec21313dd335c51d7877baf2972569f40a4291b76a0ce51391523ae358d05899", size = 744738 } wheels = [ - { url = "https://files.pythonhosted.org/packages/82/b2/2345dc595998caa6f68adf84e8f8b50d18e9fc4638d32b22ea8daedd4b7a/cryptography-45.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:7573d9eebaeceeb55285205dbbb8753ac1e962af3d9640791d12b36864065e71", size = 7056239 }, - { url = "https://files.pythonhosted.org/packages/71/3d/ac361649a0bfffc105e2298b720d8b862330a767dab27c06adc2ddbef96a/cryptography-45.0.3-cp311-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d377dde61c5d67eb4311eace661c3efda46c62113ff56bf05e2d679e02aebb5b", size = 4205541 }, - { url = "https://files.pythonhosted.org/packages/70/3e/c02a043750494d5c445f769e9c9f67e550d65060e0bfce52d91c1362693d/cryptography-45.0.3-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fae1e637f527750811588e4582988932c222f8251f7b7ea93739acb624e1487f", size = 4433275 }, - { url = "https://files.pythonhosted.org/packages/40/7a/9af0bfd48784e80eef3eb6fd6fde96fe706b4fc156751ce1b2b965dada70/cryptography-45.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ca932e11218bcc9ef812aa497cdf669484870ecbcf2d99b765d6c27a86000942", size = 4209173 }, - { url = "https://files.pythonhosted.org/packages/31/5f/d6f8753c8708912df52e67969e80ef70b8e8897306cd9eb8b98201f8c184/cryptography-45.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:af3f92b1dc25621f5fad065288a44ac790c5798e986a34d393ab27d2b27fcff9", size = 3898150 }, - { url = "https://files.pythonhosted.org/packages/8b/50/f256ab79c671fb066e47336706dc398c3b1e125f952e07d54ce82cf4011a/cryptography-45.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f8f8f0b73b885ddd7f3d8c2b2234a7d3ba49002b0223f58cfde1bedd9563c56", size = 4466473 }, - { url = "https://files.pythonhosted.org/packages/62/e7/312428336bb2df0848d0768ab5a062e11a32d18139447a76dfc19ada8eed/cryptography-45.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9cc80ce69032ffa528b5e16d217fa4d8d4bb7d6ba8659c1b4d74a1b0f4235fca", size = 4211890 }, - { url = "https://files.pythonhosted.org/packages/e7/53/8a130e22c1e432b3c14896ec5eb7ac01fb53c6737e1d705df7e0efb647c6/cryptography-45.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c824c9281cb628015bfc3c59335163d4ca0540d49de4582d6c2637312907e4b1", size = 4466300 }, - { url = "https://files.pythonhosted.org/packages/ba/75/6bb6579688ef805fd16a053005fce93944cdade465fc92ef32bbc5c40681/cryptography-45.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5833bb4355cb377ebd880457663a972cd044e7f49585aee39245c0d592904578", size = 4332483 }, - { url = "https://files.pythonhosted.org/packages/2f/11/2538f4e1ce05c6c4f81f43c1ef2bd6de7ae5e24ee284460ff6c77e42ca77/cryptography-45.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bb5bf55dcb69f7067d80354d0a348368da907345a2c448b0babc4215ccd3497", size = 4573714 }, - { url = "https://files.pythonhosted.org/packages/f5/bb/e86e9cf07f73a98d84a4084e8fd420b0e82330a901d9cac8149f994c3417/cryptography-45.0.3-cp311-abi3-win32.whl", hash = "sha256:3ad69eeb92a9de9421e1f6685e85a10fbcfb75c833b42cc9bc2ba9fb00da4710", size = 2934752 }, - { url = "https://files.pythonhosted.org/packages/c7/75/063bc9ddc3d1c73e959054f1fc091b79572e716ef74d6caaa56e945b4af9/cryptography-45.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:97787952246a77d77934d41b62fb1b6f3581d83f71b44796a4158d93b8f5c490", size = 3412465 }, - { url = "https://files.pythonhosted.org/packages/71/9b/04ead6015229a9396890d7654ee35ef630860fb42dc9ff9ec27f72157952/cryptography-45.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:c92519d242703b675ccefd0f0562eb45e74d438e001f8ab52d628e885751fb06", size = 7031892 }, - { url = "https://files.pythonhosted.org/packages/46/c7/c7d05d0e133a09fc677b8a87953815c522697bdf025e5cac13ba419e7240/cryptography-45.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5edcb90da1843df85292ef3a313513766a78fbbb83f584a5a58fb001a5a9d57", size = 4196181 }, - { url = "https://files.pythonhosted.org/packages/08/7a/6ad3aa796b18a683657cef930a986fac0045417e2dc428fd336cfc45ba52/cryptography-45.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38deed72285c7ed699864f964a3f4cf11ab3fb38e8d39cfcd96710cd2b5bb716", size = 4423370 }, - { url = "https://files.pythonhosted.org/packages/4f/58/ec1461bfcb393525f597ac6a10a63938d18775b7803324072974b41a926b/cryptography-45.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5555365a50efe1f486eed6ac7062c33b97ccef409f5970a0b6f205a7cfab59c8", size = 4197839 }, - { url = "https://files.pythonhosted.org/packages/d4/3d/5185b117c32ad4f40846f579369a80e710d6146c2baa8ce09d01612750db/cryptography-45.0.3-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e4253ed8f5948a3589b3caee7ad9a5bf218ffd16869c516535325fece163dcc", size = 3886324 }, - { url = "https://files.pythonhosted.org/packages/67/85/caba91a57d291a2ad46e74016d1f83ac294f08128b26e2a81e9b4f2d2555/cryptography-45.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cfd84777b4b6684955ce86156cfb5e08d75e80dc2585e10d69e47f014f0a5342", size = 4450447 }, - { url = "https://files.pythonhosted.org/packages/ae/d1/164e3c9d559133a38279215c712b8ba38e77735d3412f37711b9f8f6f7e0/cryptography-45.0.3-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:a2b56de3417fd5f48773ad8e91abaa700b678dc7fe1e0c757e1ae340779acf7b", size = 4200576 }, - { url = "https://files.pythonhosted.org/packages/71/7a/e002d5ce624ed46dfc32abe1deff32190f3ac47ede911789ee936f5a4255/cryptography-45.0.3-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:57a6500d459e8035e813bd8b51b671977fb149a8c95ed814989da682314d0782", size = 4450308 }, - { url = "https://files.pythonhosted.org/packages/87/ad/3fbff9c28cf09b0a71e98af57d74f3662dea4a174b12acc493de00ea3f28/cryptography-45.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f22af3c78abfbc7cbcdf2c55d23c3e022e1a462ee2481011d518c7fb9c9f3d65", size = 4325125 }, - { url = "https://files.pythonhosted.org/packages/f5/b4/51417d0cc01802304c1984d76e9592f15e4801abd44ef7ba657060520bf0/cryptography-45.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:232954730c362638544758a8160c4ee1b832dc011d2c41a306ad8f7cccc5bb0b", size = 4560038 }, - { url = "https://files.pythonhosted.org/packages/80/38/d572f6482d45789a7202fb87d052deb7a7b136bf17473ebff33536727a2c/cryptography-45.0.3-cp37-abi3-win32.whl", hash = "sha256:cb6ab89421bc90e0422aca911c69044c2912fc3debb19bb3c1bfe28ee3dff6ab", size = 2924070 }, - { url = "https://files.pythonhosted.org/packages/91/5a/61f39c0ff4443651cc64e626fa97ad3099249152039952be8f344d6b0c86/cryptography-45.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:d54ae41e6bd70ea23707843021c778f151ca258081586f0cfa31d936ae43d1b2", size = 3395005 }, - { url = "https://files.pythonhosted.org/packages/e7/d4/58a246342093a66af8935d6aa59f790cbb4731adae3937b538d054bdc2f9/cryptography-45.0.3-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:edd6d51869beb7f0d472e902ef231a9b7689508e83880ea16ca3311a00bf5ce7", size = 3589802 }, - { url = "https://files.pythonhosted.org/packages/96/61/751ebea58c87b5be533c429f01996050a72c7283b59eee250275746632ea/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:555e5e2d3a53b4fabeca32835878b2818b3f23966a4efb0d566689777c5a12c8", size = 4146964 }, - { url = "https://files.pythonhosted.org/packages/8d/01/28c90601b199964de383da0b740b5156f5d71a1da25e7194fdf793d373ef/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:25286aacb947286620a31f78f2ed1a32cded7be5d8b729ba3fb2c988457639e4", size = 4388103 }, - { url = "https://files.pythonhosted.org/packages/3d/ec/cd892180b9e42897446ef35c62442f5b8b039c3d63a05f618aa87ec9ebb5/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:050ce5209d5072472971e6efbfc8ec5a8f9a841de5a4db0ebd9c2e392cb81972", size = 4150031 }, - { url = "https://files.pythonhosted.org/packages/db/d4/22628c2dedd99289960a682439c6d3aa248dff5215123ead94ac2d82f3f5/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:dc10ec1e9f21f33420cc05214989544727e776286c1c16697178978327b95c9c", size = 4387389 }, - { url = "https://files.pythonhosted.org/packages/39/ec/ba3961abbf8ecb79a3586a4ff0ee08c9d7a9938b4312fb2ae9b63f48a8ba/cryptography-45.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:9eda14f049d7f09c2e8fb411dda17dd6b16a3c76a1de5e249188a32aeb92de19", size = 3337432 }, + { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361 }, + { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350 }, + { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572 }, + { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124 }, + { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122 }, + { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831 }, + { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583 }, + { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753 }, + { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550 }, + { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367 }, + { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843 }, + { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057 }, + { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789 }, + { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919 }, + { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812 }, + { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571 }, + { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832 }, + { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719 }, + { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852 }, + { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906 }, + { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572 }, + { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, + { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, + { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, + { url = "https://files.pythonhosted.org/packages/d6/d7/f30e75a6aa7d0f65031886fa4a1485c2fbfe25a1896953920f6a9cfe2d3b/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d", size = 3887513 }, + { url = "https://files.pythonhosted.org/packages/9c/b4/7a494ce1032323ca9db9a3661894c66e0d7142ad2079a4249303402d8c71/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471", size = 4107432 }, + { url = "https://files.pythonhosted.org/packages/45/f8/6b3ec0bc56123b344a8d2b3264a325646d2dcdbdd9848b5e6f3d37db90b3/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615", size = 3891421 }, + { url = "https://files.pythonhosted.org/packages/57/ff/f3b4b2d007c2a646b0f69440ab06224f9cf37a977a72cdb7b50632174e8a/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390", size = 4107081 }, +] + +[[package]] +name = "darglint" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/2c/86e8549e349388c18ca8a4ff8661bb5347da550f598656d32a98eaaf91cc/darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da", size = 74435 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/28/85d1e0396d64422c5218d68e5cdcc53153aa8a2c83c7dbc3ee1502adf3a1/darglint-1.8.1-py3-none-any.whl", hash = "sha256:5ae11c259c17b0701618a20c3da343a3eb98b3bc4b5a83d31cdd94f5ebdced8d", size = 120767 }, ] [[package]] @@ -865,6 +915,73 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] +[[package]] +name = "flake8" +version = "7.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mccabe" }, + { name = "pycodestyle" }, + { name = "pyflakes" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/c4/5842fc9fc94584c455543540af62fd9900faade32511fab650e9891ec225/flake8-7.2.0.tar.gz", hash = "sha256:fa558ae3f6f7dbf2b4f22663e5343b6b6023620461f8d4ff2019ef4b5ee70426", size = 48177 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/5c/0627be4c9976d56b1217cb5187b7504e7fd7d3503f8bfd312a04077bd4f7/flake8-7.2.0-py2.py3-none-any.whl", hash = "sha256:93b92ba5bdb60754a6da14fa3b93a9361fd00a59632ada61fd7b130436c40343", size = 57786 }, +] + +[[package]] +name = "flake8-bandit" +version = "4.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bandit" }, + { name = "flake8" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/1c/4f66a7a52a246d6c64312b5c40da3af3630cd60b27af81b137796af3c0bc/flake8_bandit-4.1.1.tar.gz", hash = "sha256:068e09287189cbfd7f986e92605adea2067630b75380c6b5733dab7d87f9a84e", size = 5403 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/5f/55bab0ac89f9ad9f4c6e38087faa80c252daec4ccb7776b4dac216ca9e3f/flake8_bandit-4.1.1-py3-none-any.whl", hash = "sha256:4c8a53eb48f23d4ef1e59293657181a3c989d0077c9952717e98a0eace43e06d", size = 4828 }, +] + +[[package]] +name = "flake8-bugbear" +version = "24.12.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "flake8" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/25/48ba712ff589b0149f21135234f9bb45c14d6689acc6151b5e2ff8ac2ae9/flake8_bugbear-24.12.12.tar.gz", hash = "sha256:46273cef0a6b6ff48ca2d69e472f41420a42a46e24b2a8972e4f0d6733d12a64", size = 82907 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/21/0a875f75fbe4008bd171e2fefa413536258fe6b4cfaaa087986de74588f4/flake8_bugbear-24.12.12-py3-none-any.whl", hash = "sha256:1b6967436f65ca22a42e5373aaa6f2d87966ade9aa38d4baf2a1be550767545e", size = 36664 }, +] + +[[package]] +name = "flake8-docstrings" +version = "1.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flake8" }, + { name = "pydocstyle" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/24/f839e3a06e18f4643ccb81370909a497297909f15106e6af2fecdef46894/flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af", size = 5995 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/7d/76a278fa43250441ed9300c344f889c7fb1817080c8fb8996b840bf421c2/flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75", size = 4994 }, +] + +[[package]] +name = "flake8-rst-docstrings" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flake8" }, + { name = "pygments" }, + { name = "restructuredtext-lint" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b2/e5/013d5858b69b4ba38ff259d55bd8d107009f212f296be0824b7c4a27d7ed/flake8-rst-docstrings-0.3.0.tar.gz", hash = "sha256:d1ce22b4bd37b73cd86b8d980e946ef198cfcc18ed82fedb674ceaa2f8d1afa4", size = 19865 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/bf/0e6933d78d172df672325622bf1b7f8364f4a6515da9e89398227c19d02e/flake8_rst_docstrings-0.3.0-py3-none-any.whl", hash = "sha256:f8c3c6892ff402292651c31983a38da082480ad3ba253743de52989bdc84ca1c", size = 10892 }, +] + [[package]] name = "flexcache" version = "0.3" @@ -1326,6 +1443,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, ] +[[package]] +name = "isort" +version = "5.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/87/f9/c1eb8635a24e87ade2efce21e3ce8cd6b8630bb685ddc9cdaca1349b2eb5/isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109", size = 175303 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 }, +] + [[package]] name = "jedi" version = "0.19.2" @@ -1569,6 +1695,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, ] +[[package]] +name = "mccabe" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350 }, +] + [[package]] name = "mdit-py-plugins" version = "0.4.2" @@ -1770,14 +1905,23 @@ lossy = [ [package.dev-dependencies] dev = [ + { name = "black" }, { name = "coverage", extra = ["toml"] }, + { name = "darglint" }, + { name = "flake8" }, + { name = "flake8-bandit" }, + { name = "flake8-bugbear" }, + { name = "flake8-docstrings" }, + { name = "flake8-rst-docstrings" }, + { name = "isort" }, { name = "mypy" }, + { name = "pep8-naming" }, { name = "pre-commit" }, { name = "pre-commit-hooks" }, { name = "pygments" }, { name = "pytest" }, { name = "pytest-dependency" }, - { name = "ruff" }, + { name = "pyupgrade" }, { name = "safety" }, { name = "typeguard" }, { name = "xdoctest", extra = ["colors"] }, @@ -1804,9 +1948,12 @@ requires-dist = [ { name = "fsspec", specifier = ">=2024.10.0" }, { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2024.10.0" }, { name = "pint", specifier = ">=0.24.3,<0.25" }, + { name = "pint", specifier = ">=0.24.3,<0.25" }, { name = "psutil", specifier = ">=6.1.0,<7.0.0" }, { name = "pydantic", specifier = ">=2.8.2,<3.0.0" }, { name = "pydantic-settings", specifier = ">=2.4.0,<3.0.0" }, + { name = "pydantic", specifier = ">=2.8.2,<3.0.0" }, + { name = "pydantic-settings", specifier = ">=2.4.0,<3.0.0" }, { name = "rich", specifier = ">=13.9.4,<14.0.0" }, { name = "s3fs", marker = "extra == 'cloud'", specifier = "==2024.12.0" }, { name = "segy", specifier = ">=0.4.0,<0.5.0" }, @@ -1819,14 +1966,23 @@ provides-extras = ["cloud", "distributed", "lossy"] [package.metadata.requires-dev] dev = [ + { name = "black", specifier = ">=24.10.0,<25" }, { name = "coverage", extras = ["toml"], specifier = ">=7.6.7,<8" }, + { name = "darglint", specifier = ">=1.8.1,<2" }, + { name = "flake8", specifier = ">=7.1.0,<8" }, + { name = "flake8-bandit", specifier = ">=4.1.1,<5" }, + { name = "flake8-bugbear", specifier = ">=24.4.26,<25" }, + { name = "flake8-docstrings", specifier = ">=1.7.0,<2" }, + { name = "flake8-rst-docstrings", specifier = ">=0.3.0,<0.4" }, + { name = "isort", specifier = ">=5.13.2,<6" }, { name = "mypy", specifier = ">=1.13.0,<2" }, + { name = "pep8-naming", specifier = ">=0.14.1,<0.15" }, { name = "pre-commit", specifier = ">=4.0.1,<5" }, { name = "pre-commit-hooks", specifier = ">=5.0.0,<6" }, { name = "pygments", specifier = ">=2.18.0,<3" }, { name = "pytest", specifier = ">=8.3.3,<9" }, { name = "pytest-dependency", specifier = ">=0.6.0,<0.7" }, - { name = "ruff", specifier = ">=0.11.8" }, + { name = "pyupgrade", specifier = ">=3.19.0,<4" }, { name = "safety", specifier = ">=3.2.3,<4" }, { name = "typeguard", specifier = ">=4.4.1,<5" }, { name = "xdoctest", extras = ["colors"], specifier = ">=1.2.0,<2" }, @@ -1835,7 +1991,7 @@ docs = [ { name = "furo", specifier = ">=2024.8.6" }, { name = "linkify-it-py", specifier = ">=2.0.3" }, { name = "myst-nb", specifier = ">=1.2.0" }, - { name = "sphinx", specifier = ">=8.2.3,<9" }, + { name = "sphinx", specifier = ">=8.1.3,<9" }, { name = "sphinx-autobuild", specifier = ">=2024.10.3" }, { name = "sphinx-click", specifier = ">=6.0.0,<7" }, { name = "sphinx-copybutton", specifier = ">=0.5.2,<0.6" }, @@ -2180,6 +2336,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905 }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, +] + +[[package]] +name = "pbr" +version = "6.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/d2/510cc0d218e753ba62a1bc1434651db3cd797a9716a0a66cc714cb4f0935/pbr-6.1.1.tar.gz", hash = "sha256:93ea72ce6989eb2eed99d0f75721474f69ad88128afdef5ac377eb797c4bf76b", size = 125702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/ac/684d71315abc7b1214d59304e23a982472967f6bf4bde5a98f1503f648dc/pbr-6.1.1-py2.py3-none-any.whl", hash = "sha256:38d4daea5d9fa63b3f626131b9d34947fd0c8be9b05a29276870580050a25a76", size = 108997 }, +] + +[[package]] +name = "pep8-naming" +version = "0.14.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flake8" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/8e/1de32e908d8b008bb9352bfe7749aedecb71e2793d36c7ee342716acd1ec/pep8-naming-0.14.1.tar.gz", hash = "sha256:1ef228ae80875557eb6c1549deafed4dabbf3261cfcafa12f773fe0db9be8a36", size = 16546 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/a2/450b71d1a87fcee50a7b994a53b1c68fc6a6b718df0eb035f2bffb2d3a4f/pep8_naming-0.14.1-py3-none-any.whl", hash = "sha256:63f514fc777d715f935faf185dedd679ab99526a7f2f503abb61587877f7b1c5", size = 8859 }, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -2477,6 +2666,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, ] +[[package]] +name = "pycodestyle" +version = "2.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/6e/1f4a62078e4d95d82367f24e685aef3a672abfd27d1a868068fed4ed2254/pycodestyle-2.13.0.tar.gz", hash = "sha256:c8415bf09abe81d9c7f872502a6eee881fbe85d8763dd5b9924bb0a01d67efae", size = 39312 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/be/b00116df1bfb3e0bb5b45e29d604799f7b91dd861637e4d448b4e09e6a3e/pycodestyle-2.13.0-py2.py3-none-any.whl", hash = "sha256:35863c5974a271c7a726ed228a14a4f6daf49df369d8c50cd9a6f58a5e143ba9", size = 31424 }, +] + [[package]] name = "pycparser" version = "2.22" @@ -2558,7 +2756,28 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/67/1d/42628a2c33e93f8e9acbde0d5d735fa0850f3e6a2f8cb1eb6c40b9a732ac/pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268", size = 163234 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/5f/d6d641b490fd3ec2c4c13b4244d68deea3a1b970a97be64f34fb5504ff72/pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef", size = 44356 }, + { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 }, +] + +[[package]] +name = "pydocstyle" +version = "6.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "snowballstemmer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/d5385ca59fd065e3c6a5fe19f9bc9d5ea7f2509fa8c9c22fb6b2031dd953/pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1", size = 36796 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/ea/99ddefac41971acad68f14114f38261c1f27dac0b3ec529824ebc739bdaa/pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", size = 38038 }, +] + +[[package]] +name = "pyflakes" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/cc/1df338bd7ed1fa7c317081dcf29bf2f01266603b301e6858856d346a12b3/pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b", size = 64175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/40/b293a4fa769f3b02ab9e387c707c4cbdc34f073f945de0386107d4e669e6/pyflakes-3.3.2-py2.py3-none-any.whl", hash = "sha256:5039c8339cbb1944045f4ee5466908906180f13cc99cc9949348d10f82a5c32a", size = 63164 }, ] [[package]] @@ -2639,6 +2858,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, ] +[[package]] +name = "pyupgrade" +version = "3.19.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tokenize-rt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/3a/efa8e75cf84d53f1b3f0113387ab120ef460396a4068e41b6cf18a3d216d/pyupgrade-3.19.1.tar.gz", hash = "sha256:d10e8c5f54b8327211828769e98d95d95e4715de632a3414f1eef3f51357b9e2", size = 45116 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/43/c6c1ff945c7900613f6e6ef2a8688639a247d62eb0ffa9935c599f69c08e/pyupgrade-3.19.1-py2.py3-none-any.whl", hash = "sha256:8c5b0bfacae5ff30fa136a53eb7f22c34ba007450d4099e9da8089dabb9e67c9", size = 62412 }, +] + [[package]] name = "pywin32" version = "310" @@ -2901,6 +3132,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, ] +[[package]] +name = "restructuredtext-lint" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/9c/6d8035cafa2d2d314f34e6cd9313a299de095b26e96f1c7312878f988eec/restructuredtext_lint-1.4.0.tar.gz", hash = "sha256:1b235c0c922341ab6c530390892eb9e92f90b9b75046063e047cacfb0f050c45", size = 16723 } + [[package]] name = "rich" version = "13.9.4" @@ -3432,7 +3672,19 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037 }, + { url = "https://files.pythonhosted.org/packages/a0/4b/528ccf7a982216885a1ff4908e886b8fb5f19862d1962f56a3fce2435a70/starlette-0.46.1-py3-none-any.whl", hash = "sha256:77c74ed9d2720138b25875133f3a2dae6d854af2ec37dceb56aef370c1d8a227", size = 71995 }, +] + +[[package]] +name = "stevedore" +version = "5.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pbr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/3f/13cacea96900bbd31bb05c6b74135f85d15564fc583802be56976c940470/stevedore-5.4.1.tar.gz", hash = "sha256:3135b5ae50fe12816ef291baff420acb727fcd356106e3e9cbfa9e5985cd6f4b", size = 513858 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/45/8c4ebc0c460e6ec38e62ab245ad3c7fc10b210116cea7c16d61602aa9558/stevedore-5.4.1-py3-none-any.whl", hash = "sha256:d10a31c7b86cba16c1f6e8d15416955fc797052351a56af15e608ad20811fcfe", size = 49533 }, ] [[package]] @@ -3453,6 +3705,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/aa5c8b10b2cce7a053018e0d132bd58e27527a0243c4985383d5b6fd93e9/tblib-3.1.0-py3-none-any.whl", hash = "sha256:670bb4582578134b3d81a84afa1b016128b429f3d48e6cbbaecc9d15675e984e", size = 12552 }, ] +[[package]] +name = "tokenize-rt" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6b/0a/5854d8ced8c1e00193d1353d13db82d7f813f99bd5dcb776ce3e2a4c0d19/tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86", size = 5506 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/ba/576aac29b10dfa49a6ce650001d1bb31f81e734660555eaf144bfe5b8995/tokenize_rt-6.1.0-py2.py3-none-any.whl", hash = "sha256:d706141cdec4aa5f358945abe36b911b8cbdc844545da99e811250c0cee9b6fc", size = 6015 }, +] + [[package]] name = "tenacity" version = "9.1.2" From 4142ae31826e9951e3e9af7db0b9d04a311096f0 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 28 Apr 2025 16:21:26 +0000 Subject: [PATCH 02/55] Begin adding template factory --- pyproject.toml | 1 + src/mdio/schemas/v1/template_factory.py | 79 ++++++++ tests/unit/test_template_factory.py | 252 ++++++++++++++++++++++++ 3 files changed, 332 insertions(+) create mode 100644 src/mdio/schemas/v1/template_factory.py create mode 100644 tests/unit/test_template_factory.py diff --git a/pyproject.toml b/pyproject.toml index 7e01894e..2c500c4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "tqdm (>=4.67.0,<5.0.0)", "xarray>=2025.3.1", "zarr (>=3.0.8,<4.0.0)", + "pint (>=0.24.3,<0.25)", ] [project.optional-dependencies] diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py new file mode 100644 index 00000000..eb7713af --- /dev/null +++ b/src/mdio/schemas/v1/template_factory.py @@ -0,0 +1,79 @@ +"""Factory methods for MDIO v1 schema models.""" + +from datetime import datetime +from typing import Any, Optional, Union, List, Dict + +from pydantic import AwareDatetime + +from mdio.schema.dimension import NamedDimension +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.dataset import Dataset, DatasetMetadata +from mdio.schema.v1.variable import Variable, Coordinate, VariableMetadata + + +def make_named_dimension(name: str, size: int) -> NamedDimension: + """Create a NamedDimension with the given name and size.""" + return NamedDimension(name=name, size=size) + + +def make_coordinate( + name: str, + dimensions: List[Union[NamedDimension, str]], + data_type: ScalarType, + metadata: Optional[List[Union[AllUnits, UserAttributes]]] = None, +) -> Coordinate: + """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" + return Coordinate( + name=name, + dimensions=dimensions, + data_type=data_type, + metadata=metadata, + ) + + +def make_variable( + name: str, + dimensions: List[Union[NamedDimension, str]], + data_type: Union[ScalarType, StructuredType], + compressor: Union[Blosc, ZFP, None], + coordinates: Optional[List[Union[Coordinate, str]]] = None, + metadata: Optional[VariableMetadata] = None, +) -> Variable: + """Create a Variable with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" + return Variable( + name=name, + dimensions=dimensions, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=metadata, + ) + + +def make_dataset_metadata( + name: str, + api_version: str, + created_on: AwareDatetime, + attributes: Optional[Dict[str, Any]] = None, +) -> DatasetMetadata: + """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" + return DatasetMetadata( + name=name, + api_version=api_version, + created_on=created_on, + attributes=attributes, + ) + + +def make_dataset( + variables: List[Variable], + metadata: DatasetMetadata, +) -> Dataset: + """Create a Dataset with the given variables and metadata.""" + return Dataset( + variables=variables, + metadata=metadata, + ) \ No newline at end of file diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py new file mode 100644 index 00000000..25bd8278 --- /dev/null +++ b/tests/unit/test_template_factory.py @@ -0,0 +1,252 @@ +"""Unit tests for MDIO v1 template_factory.""" +import pytest +from datetime import datetime, timezone +from pydantic import AwareDatetime, ValidationError + +from mdio.schema.dimension import NamedDimension +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.v1.units import LengthUnitModel +from mdio.schema.v1.template_factory import ( + make_named_dimension, + make_coordinate, + make_variable, + make_dataset_metadata, + make_dataset, +) + + +def test_make_named_dimension(): + dim = make_named_dimension("time", 42) + assert isinstance(dim, NamedDimension) + assert dim.name == "time" + assert dim.size == 42 + + +def test_make_coordinate_minimal(): + dims = ["x"] + coord = make_coordinate(name="x", dimensions=dims, data_type=ScalarType.FLOAT32) + assert coord.name == "x" + assert coord.dimensions == dims + assert coord.data_type == ScalarType.FLOAT32 + assert coord.metadata is None + + +def test_make_variable_minimal(): + var = make_variable( + name="var", + dimensions=["x"], + data_type=ScalarType.FLOAT32, + compressor=None, + ) + assert var.name == "var" + assert var.dimensions == ["x"] + assert var.data_type == ScalarType.FLOAT32 + assert var.compressor is None + assert var.coordinates is None + assert var.metadata is None + + +def test_make_dataset_metadata_minimal(): + ts: AwareDatetime = datetime.now(timezone.utc) + meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) + assert meta.name == "ds" + assert meta.api_version == "1" + assert meta.created_on == ts + assert meta.attributes is None + + +def test_make_dataset_minimal(): + var = make_variable( + name="var", + dimensions=["x"], + data_type=ScalarType.FLOAT32, + compressor=None, + ) + ts: AwareDatetime = datetime.now(timezone.utc) + meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) + ds = make_dataset([var], meta) + assert ds.variables == [var] + assert ds.metadata == meta + +def test_make_toy_dataset(): + # Define core dimensions + inline = make_named_dimension("inline", 256) + crossline = make_named_dimension("crossline", 512) + depth = make_named_dimension("depth", 384) + + # Create dataset metadata + created = datetime.fromisoformat("2023-12-12T15:02:06.413469-06:00") + meta = make_dataset_metadata( + name="campos_3d", + api_version="1.0.0", + created_on=created, + attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... " + ], + "foo": "bar" + } + ) + + # Image variable + image = make_variable( + name="image", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + }, + "attributes": {"fizz": "buzz"} + } + ) + + # Velocity variable + velocity = make_variable( + name="velocity", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT16, + compressor=None, + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "unitsV1": {"speed": "m/s"} + } + ) + + # Inline-optimized image variable + image_inline = make_variable( + name="image_inline", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT32, + compressor=ZFP(mode="fixed_accuracy", tolerance=0.05), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}} + } + ) + + # Headers variable with structured dtype + headers_dtype = StructuredType(fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16} + ]) + image_headers = make_variable( + name="image_headers", + dimensions=[inline, crossline], + data_type=headers_dtype, + compressor=None, + coordinates=["inline", "crossline", "cdp-x", "cdp-y"], + metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} + ) + + # Standalone dimension arrays + inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32, compressor=None) + crossline_var = make_variable(name="crossline", dimensions=[crossline], data_type=ScalarType.UINT32, compressor=None) + depth_var = make_variable( + name="depth", + dimensions=[depth], + data_type=ScalarType.UINT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + cdp_x = make_variable( + name="cdp-x", + dimensions=[inline, crossline], + data_type=ScalarType.FLOAT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + cdp_y = make_variable( + name="cdp-y", + dimensions=[inline, crossline], + data_type=ScalarType.FLOAT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + + # Compose full dataset + ds = make_dataset( + variables=[ + image, velocity, image_inline, image_headers, + inline_var, crossline_var, depth_var, cdp_x, cdp_y + ], + metadata=meta + ) + + # Verify basic structure + assert ds.metadata.name == "campos_3d" + assert len(ds.variables) == 9 + names = [v.name for v in ds.variables] + assert names == [ + "image", "velocity", "image_inline", "image_headers", + "inline", "crossline", "depth", "cdp-x", "cdp-y" + ] + +def test_named_dimension_invalid_size(): + with pytest.raises(ValidationError): + make_named_dimension("dim", 0) + with pytest.raises(ValidationError): + make_named_dimension("dim", -1) + +def test_make_coordinate_invalid_types(): + # dimensions must be a list of NamedDimension or str + with pytest.raises(ValidationError): + make_coordinate(name="coord", dimensions="notalist", data_type=ScalarType.FLOAT32) + # data_type must be a valid ScalarType + with pytest.raises(ValidationError): + make_coordinate(name="coord", dimensions=["x"], data_type="notatype") + +def test_make_variable_invalid_args(): + # compressor must be Blosc, ZFP or None + with pytest.raises(ValidationError): + make_variable( + name="var", + dimensions=["x"], + data_type=ScalarType.FLOAT32, + compressor="notacompressor" + ) + # metadata dict must match VariableMetadata schema + with pytest.raises(ValidationError): + make_variable( + name="var", + dimensions=["x"], + data_type=ScalarType.FLOAT32, + compressor=None, + metadata={"foo": "bar"} + ) + +def test_make_dataset_metadata_invalid_created_on(): + # created_on must be an aware datetime + with pytest.raises(ValidationError): + make_dataset_metadata(name="ds", api_version="1", created_on="not-a-date") + + +def test_make_dataset_invalid_variables_and_metadata_types(): + ts: AwareDatetime = datetime.now(timezone.utc) + meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) + var = make_variable( + name="var", + dimensions=["x"], + data_type=ScalarType.FLOAT32, + compressor=None, + ) + # variables must be a list of Variable objects + with pytest.raises(ValidationError): + make_dataset(variables="notalist", metadata=meta) + # metadata must be a DatasetMetadata instance + with pytest.raises(ValidationError): + make_dataset(variables=[var], metadata={"foo": "bar"}) \ No newline at end of file From d2284f2a9ae3d248260665f4d24fe89a11dbf47b Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 28 Apr 2025 16:28:30 +0000 Subject: [PATCH 03/55] Resolve deprecated warning --- src/mdio/schemas/v1/units.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdio/schemas/v1/units.py b/src/mdio/schemas/v1/units.py index 1913ff2e..f96b7491 100644 --- a/src/mdio/schemas/v1/units.py +++ b/src/mdio/schemas/v1/units.py @@ -12,7 +12,7 @@ from mdio.schemas.units import create_unit_model ureg = UnitRegistry() -ureg.default_format = "~C" # compact, abbreviated (symbol). +ureg.formatter.default_format = "~C" # compact, abbreviated (symbol). class LengthUnitEnum(UnitEnum): From 3064117076f5b4e9071bbf65faf99ad412a08d02 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 28 Apr 2025 16:30:02 +0000 Subject: [PATCH 04/55] Update type hinting, default compressor to none. --- src/mdio/schemas/v1/template_factory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py index eb7713af..ef025839 100644 --- a/src/mdio/schemas/v1/template_factory.py +++ b/src/mdio/schemas/v1/template_factory.py @@ -21,9 +21,9 @@ def make_named_dimension(name: str, size: int) -> NamedDimension: def make_coordinate( name: str, - dimensions: List[Union[NamedDimension, str]], - data_type: ScalarType, - metadata: Optional[List[Union[AllUnits, UserAttributes]]] = None, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + metadata: Optional[List[AllUnits | UserAttributes]] = None, ) -> Coordinate: """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" return Coordinate( @@ -36,10 +36,10 @@ def make_coordinate( def make_variable( name: str, - dimensions: List[Union[NamedDimension, str]], - data_type: Union[ScalarType, StructuredType], - compressor: Union[Blosc, ZFP, None], - coordinates: Optional[List[Union[Coordinate, str]]] = None, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, metadata: Optional[VariableMetadata] = None, ) -> Variable: """Create a Variable with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" From a733bdddd878aeae114893f9bda894bf1b611ec5 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 28 Apr 2025 16:34:00 +0000 Subject: [PATCH 05/55] Testing updates --- tests/unit/test_template_factory.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 25bd8278..e6662d70 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -154,13 +154,14 @@ def test_make_toy_dataset(): ) # Standalone dimension arrays - inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32, compressor=None) + # Tests that we don't need to pass a compressor. + inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32) + # Tests that we can still pass it explicitly. crossline_var = make_variable(name="crossline", dimensions=[crossline], data_type=ScalarType.UINT32, compressor=None) depth_var = make_variable( name="depth", dimensions=[depth], data_type=ScalarType.UINT32, - compressor=None, metadata={"unitsV1": {"length": "m"}} ) cdp_x = make_variable( From abc41f24b3d6310489638995428cb9305d1dace2 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 1 May 2025 18:37:06 +0000 Subject: [PATCH 06/55] Begin work on schema construction with Xarray --- pyproject.toml | 1 + src/mdio/core/v1/__init__.py | 27 ++++ src/mdio/core/v1/_overloads.py | 27 ++++ src/mdio/core/v1/xarray_constructor.py | 91 ++++++++++++ tests/integration/test_xarray_constructor.py | 148 +++++++++++++++++++ uv.lock | 2 +- 6 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 src/mdio/core/v1/__init__.py create mode 100644 src/mdio/core/v1/_overloads.py create mode 100644 src/mdio/core/v1/xarray_constructor.py create mode 100644 tests/integration/test_xarray_constructor.py diff --git a/pyproject.toml b/pyproject.toml index 2c500c4c..525c1735 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "xarray>=2025.3.1", "zarr (>=3.0.8,<4.0.0)", "pint (>=0.24.3,<0.25)", + "xarray (>=2025.4.0)", ] [project.optional-dependencies] diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py new file mode 100644 index 00000000..e3f9e8f2 --- /dev/null +++ b/src/mdio/core/v1/__init__.py @@ -0,0 +1,27 @@ +# mdio/__init__.py + +import xarray as _xr +from ._overloads import open_mdio, to_mdio + +__all__ = [ + # explicit overrides / aliases + "open_mdio", + "to_mdio", + # everything else will be auto-populated by __dir__ / __getattr__ +] + +def __getattr__(name: str): + """ + Fallback: anything not defined in mdio/__init__.py + gets looked up on xarray. + """ + if hasattr(_xr, name): + return getattr(_xr, name) + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + +def __dir__(): + """ + Make dir(mdio) list our overrides and then all public xarray names. + """ + xr_public = [n for n in dir(_xr) if not n.startswith("_")] + return sorted(__all__ + xr_public) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py new file mode 100644 index 00000000..9f86b9bf --- /dev/null +++ b/src/mdio/core/v1/_overloads.py @@ -0,0 +1,27 @@ +import xarray as _xr +from xarray import Dataset as _Dataset, DataArray as _DataArray + +def open_mdio(store, *args, engine="zarr", consolidated=False, **kwargs): + """ + Our mdio version of xr.open_zarr. Prints a greeting, + then calls xr.open_dataset(..., engine="zarr"). + """ + print("👋 hello world from mdio.open_mdio!") + return _xr.open_dataset(store, *args, + engine=engine, + consolidated=consolidated, + **kwargs) + +def to_mdio(self, *args, **kwargs): + """ + Alias for .to_zarr, renamed to .to_mdio, + so you get a consistent mdio.* naming. + """ + print("👋 hello world from mdio.to_mdio!") + print(f"kwargs: {kwargs}") + return self.to_zarr(*args, **kwargs) + +# Monkey-patch Dataset and DataArray so that you can do: +# ds.to_mdio(...) and arr.to_mdio(...) +_Dataset.to_mdio = to_mdio +_DataArray.to_mdio = to_mdio diff --git a/src/mdio/core/v1/xarray_constructor.py b/src/mdio/core/v1/xarray_constructor.py new file mode 100644 index 00000000..8cb6c9d9 --- /dev/null +++ b/src/mdio/core/v1/xarray_constructor.py @@ -0,0 +1,91 @@ +"""Construct an Xarray Dataset from an MDIO v1 Dataset and write to Zarr.""" +import xarray as xr +import numpy as np +import dask.array as da +from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding +from typing import Any + +from mdio.schema.v1.dataset import Dataset as MDIODataset +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType, StructuredType + + +def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: + """Build an empty xarray.Dataset with correct dimensions and dtypes.""" + # Collect dimension sizes + dims: dict[str, int] = {} + for var in mdio_ds.variables: + for d in var.dimensions: + if isinstance(d, NamedDimension): + dims[d.name] = d.size + + # Build data variables + data_vars: dict[str, xr.DataArray] = {} + for var in mdio_ds.variables: + dim_names = [d.name if isinstance(d, NamedDimension) else d for d in var.dimensions] + shape = tuple(dims[name] for name in dim_names) + dt = var.data_type + if isinstance(dt, ScalarType): + dtype = np.dtype(dt.value) + elif isinstance(dt, StructuredType): + dtype = np.dtype([(f.name, f.format.value) for f in dt.fields]) + else: + raise TypeError(f"Unsupported data_type: {dt}") + # arr = da.zeros(shape, dtype=dtype) + arr = np.zeros(shape, dtype=dtype) + data_array = xr.DataArray(arr, dims=dim_names) + # set default fill_value to zero instead of NaN + data_array.encoding['fill_value'] = 0.0 # TODO: This seems to be ignored by xarray + # attach variable metadata into DataArray attributes, excluding nulls and chunkGrid + if var.metadata is not None: + md = var.metadata.model_dump( + by_alias=True, + exclude_none=True, + exclude={"chunk_grid"}, + ) + data_array.attrs.update(md) + data_vars[var.name] = data_array + + ds = xr.Dataset(data_vars) + # Attach metadata as attrs + ds.attrs["apiVersion"] = mdio_ds.metadata.api_version + ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) + if mdio_ds.metadata.attributes: + ds.attrs["attributes"] = mdio_ds.metadata.attributes + return ds + + +def to_mdio_zarr(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: + """Construct an xarray.Dataset and write it to a Zarr store. Returns the xarray.Dataset.""" + ds = construct_xarray_dataset(mdio_ds) + # Write to Zarr format v2 with consolidated metadata and all attributes + enc = V2ChunkKeyEncoding(separator="/").to_dict() + global_encodings = {} + + for var in mdio_ds.variables: + fill_value = 0 + if isinstance(var.data_type, StructuredType): + # Create a structured fill value that matches the dtype + # fill_value = np.zeros(1, dtype=[(f.name, f.format.value) for f in var.data_type.fields])[0] + # TODO: Re-enable this once xarray supports this PR https://github.com/zarr-developers/zarr-python/pull/3015 + continue + chunks = None + if var.metadata is not None and var.metadata.chunk_grid is not None: + chunks = var.metadata.chunk_grid.configuration.chunk_shape + global_encodings[var.name] = { + "chunks": chunks, + # TODO: Re-enable this once xarray supports this PR https://github.com/pydata/xarray/pull/10274 + # "chunk_key_encoding": enc, + "_FillValue": fill_value, + "dtype": var.data_type, + } + + ds.to_mdio(store, + mode="w", + zarr_format=2, + consolidated=True, + safe_chunks=False, # This ignores the Dask chunks + compute=False, # Ensures only the metadata is written + encoding=global_encodings, + **kwargs) + return ds \ No newline at end of file diff --git a/tests/integration/test_xarray_constructor.py b/tests/integration/test_xarray_constructor.py new file mode 100644 index 00000000..4d7e3220 --- /dev/null +++ b/tests/integration/test_xarray_constructor.py @@ -0,0 +1,148 @@ +"""Integration test for MDIO v1 Xarray Zarr constructor.""" +import pytest +import zarr +import numpy as np +from pathlib import Path +from datetime import datetime, timezone + +from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.v1.template_factory import ( + make_named_dimension, + make_variable, + make_dataset_metadata, + make_dataset, +) +from mdio.core.v1.xarray_constructor import to_mdio_zarr + + +def build_toy_dataset(): + # core dimensions + inline = make_named_dimension("inline", 256) + crossline = make_named_dimension("crossline", 512) + depth = make_named_dimension("depth", 384) + + # Create dataset metadata + created = datetime.fromisoformat("2023-12-12T15:02:06.413469-06:00") + meta = make_dataset_metadata( + name="campos_3d", + api_version="1.0.0", + created_on=created, + attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... " + ], + "foo": "bar" + } + ) + + # Image variable + image = make_variable( + name="image", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + }, + "attributes": {"fizz": "buzz"} + } + ) + + # Velocity variable + velocity = make_variable( + name="velocity", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT16, + compressor=None, + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "unitsV1": {"speed": "m/s"} + } + ) + + # Inline-optimized image variable + image_inline = make_variable( + name="image_inline", + dimensions=[inline, crossline, depth], + data_type=ScalarType.FLOAT32, + compressor=ZFP(mode="fixed_accuracy", tolerance=0.05), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}}} + ) + + # Headers variable with structured dtype + headers_dtype = StructuredType(fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16} + ]) + image_headers = make_variable( + name="image_headers", + dimensions=[inline, crossline], + data_type=headers_dtype, + compressor=None, + coordinates=["inline", "crossline", "cdp-x", "cdp-y"], + metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} + ) + + # Standalone dimension variables + inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32, compressor=None) + crossline_var = make_variable(name="crossline", dimensions=[crossline], data_type=ScalarType.UINT32, compressor=None) + depth_var = make_variable( + name="depth", + dimensions=[depth], + data_type=ScalarType.UINT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + cdp_x = make_variable( + name="cdp-x", + dimensions=[inline, crossline], + data_type=ScalarType.FLOAT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + cdp_y = make_variable( + name="cdp-y", + dimensions=[inline, crossline], + data_type=ScalarType.FLOAT32, + compressor=None, + metadata={"unitsV1": {"length": "m"}} + ) + + # Compose full dataset + return make_dataset( + [image, velocity, image_inline, image_headers, + inline_var, crossline_var, depth_var, cdp_x, cdp_y], + meta + ) + + +def test_to_mdio_zarr_writes_and_returns_xarray(tmp_path): + ds_in = build_toy_dataset() + # store_path = tmp_path / "toy.zarr" + store_path = "test.mdio1" + # write to Zarr and get back xarray.Dataset + ds_out = to_mdio_zarr(ds_in, str(store_path)) + # global attributes should be present on the returned Dataset + assert ds_out.attrs["apiVersion"] == ds_in.metadata.api_version + assert ds_out.attrs["createdOn"] == str(ds_in.metadata.created_on) + if ds_in.metadata.attributes: + assert ds_out.attrs["attributes"] == ds_in.metadata.attributes + # verify the DataArray exists with correct shape and dtype + arr = ds_out["image"] + assert arr.shape == (256, 512, 384) + assert arr.dtype == np.dtype("float32") diff --git a/uv.lock b/uv.lock index c2ddbc3c..04a86254 100644 --- a/uv.lock +++ b/uv.lock @@ -1958,7 +1958,7 @@ requires-dist = [ { name = "s3fs", marker = "extra == 'cloud'", specifier = "==2024.12.0" }, { name = "segy", specifier = ">=0.4.0,<0.5.0" }, { name = "tqdm", specifier = ">=4.67.0,<5.0.0" }, - { name = "xarray", specifier = ">=2025.3.1" }, + { name = "xarray", specifier = ">=2025.4.0" }, { name = "zarr", specifier = ">=3.0.8,<4.0.0" }, { name = "zfpy", marker = "extra == 'lossy'", specifier = ">=1.0.1,<2.0.0" }, ] From b4cfd367aa212f51a319172ff97b0d26c462d97a Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 2 May 2025 20:06:07 +0000 Subject: [PATCH 07/55] Add documentation --- src/mdio/core/v1/_overloads.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index 9f86b9bf..7085073c 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -1,3 +1,12 @@ +""" +Overloads for xarray. + +The intent of overloading here is: +1. To provide a consistent mdio.* naming scheme. +2. To simplify the API for users where it makes sense (e.g. MDIO v1 uses Zarr and not HDF5). +""" + + import xarray as _xr from xarray import Dataset as _Dataset, DataArray as _DataArray From 764f90263b3d8ea8c31ed7c95ecb3a82405da024 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 2 May 2025 20:06:38 +0000 Subject: [PATCH 08/55] Fix serialization issues --- src/mdio/core/v1/xarray_constructor.py | 60 +++++++++++++ src/mdio/schemas/v1/template_factory.py | 110 +++++++++++++++++++++++- 2 files changed, 166 insertions(+), 4 deletions(-) diff --git a/src/mdio/core/v1/xarray_constructor.py b/src/mdio/core/v1/xarray_constructor.py index 8cb6c9d9..3dd4a898 100644 --- a/src/mdio/core/v1/xarray_constructor.py +++ b/src/mdio/core/v1/xarray_constructor.py @@ -8,6 +8,42 @@ from mdio.schema.v1.dataset import Dataset as MDIODataset from mdio.schema.dimension import NamedDimension from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.v1.variable import Coordinate + + +from numcodecs import Blosc as NumcodecsBlosc + +try: + import zfpy as BaseZFPY # Baser library + from numcodecs import ZFPY as NumcodecsZFPY # Codec +except ImportError: + print(f"Tried to import zfpy and numcodes zfpy but failed because {ImportError}") + BaseZFPY = None + NumcodecsZFPY = None + +def convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZFPY | None: + if isinstance(model, Blosc): + return NumcodecsBlosc( + cname=model.algorithm.value, + clevel=model.level, + shuffle=model.shuffle.value, + blocksize=model.blocksize if model.blocksize > 0 else 0 + ) + elif isinstance(model, ZFP): + if BaseZFPY is None or NumcodecsZFPY is None: + raise ImportError("zfpy and numcodecs are required to use ZFP compression") + return NumcodecsZFPY( + mode=model.mode.value, + tolerance=model.tolerance, + rate=model.rate, + precision=model.precision, + write_header=model.write_header + ) + elif model is None: + return None + else: + raise TypeError(f"Unsupported compressor model: {type(model)}") def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: @@ -36,6 +72,24 @@ def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: data_array = xr.DataArray(arr, dims=dim_names) # set default fill_value to zero instead of NaN data_array.encoding['fill_value'] = 0.0 # TODO: This seems to be ignored by xarray + + # Set long_name if present + if var.long_name is not None: + data_array.attrs["long_name"] = var.long_name + + # Set coordinates if present, excluding dimension names + if var.coordinates is not None: + # Get the set of dimension names for this variable + dim_set = set(dim_names) + # Filter out any coordinates that are also dimensions + coord_names = [ + c.name if isinstance(c, Coordinate) else c + for c in var.coordinates + if (c.name if isinstance(c, Coordinate) else c) not in dim_set + ] + if coord_names: # Only set coordinates if there are any non-dimension coordinates + data_array.attrs["coordinates"] = " ".join(coord_names) + # attach variable metadata into DataArray attributes, excluding nulls and chunkGrid if var.metadata is not None: md = var.metadata.model_dump( @@ -43,6 +97,10 @@ def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: exclude_none=True, exclude={"chunk_grid"}, ) + # Convert single-element lists to objects + for key, value in md.items(): + if isinstance(value, list) and len(value) == 1: + md[key] = value[0] data_array.attrs.update(md) data_vars[var.name] = data_array @@ -50,6 +108,7 @@ def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: # Attach metadata as attrs ds.attrs["apiVersion"] = mdio_ds.metadata.api_version ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) + ds.attrs["name"] = mdio_ds.metadata.name if mdio_ds.metadata.attributes: ds.attrs["attributes"] = mdio_ds.metadata.attributes return ds @@ -78,6 +137,7 @@ def to_mdio_zarr(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: # "chunk_key_encoding": enc, "_FillValue": fill_value, "dtype": var.data_type, + "compressors": convert_compressor(var.compressor), } ds.to_mdio(store, diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py index ef025839..abcbcc17 100644 --- a/src/mdio/schemas/v1/template_factory.py +++ b/src/mdio/schemas/v1/template_factory.py @@ -1,6 +1,6 @@ """Factory methods for MDIO v1 schema models.""" -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Optional, Union, List, Dict from pydantic import AwareDatetime @@ -23,11 +23,13 @@ def make_coordinate( name: str, dimensions: List[NamedDimension | str], data_type: ScalarType | StructuredType, + long_name: str = None, metadata: Optional[List[AllUnits | UserAttributes]] = None, ) -> Coordinate: """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" return Coordinate( name=name, + long_name=long_name, dimensions=dimensions, data_type=data_type, metadata=metadata, @@ -38,18 +40,60 @@ def make_variable( name: str, dimensions: List[NamedDimension | str], data_type: ScalarType | StructuredType, + long_name: str = None, compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None, + metadata: Optional[List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata] = None, ) -> Variable: """Create a Variable with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" + # Convert metadata to VariableMetadata if needed + var_metadata = None + if metadata: + if isinstance(metadata, list): + # Convert list of metadata to dict + metadata_dict = {} + for md in metadata: + if isinstance(md, AllUnits): + # For units_v1, if it's a single element, use it directly + if isinstance(md.units_v1, list) and len(md.units_v1) == 1: + metadata_dict["units_v1"] = md.units_v1[0] + else: + metadata_dict["units_v1"] = md.units_v1 + elif isinstance(md, UserAttributes): + # For attributes, if it's a single element, use it directly + attrs = md.model_dump(by_alias=True) + if isinstance(attrs, list) and len(attrs) == 1: + metadata_dict["attributes"] = attrs[0] + else: + metadata_dict["attributes"] = attrs + var_metadata = VariableMetadata(**metadata_dict) + elif isinstance(metadata, dict): + # Convert camelCase keys to snake_case for VariableMetadata + converted_dict = {} + for key, value in metadata.items(): + if key == "unitsV1": + # For units_v1, if it's a single element array, use the element directly + if isinstance(value, list) and len(value) == 1: + converted_dict["units_v1"] = value[0] + else: + converted_dict["units_v1"] = value + else: + converted_dict[key] = value + var_metadata = VariableMetadata(**converted_dict) + elif isinstance(metadata, VariableMetadata): + var_metadata = metadata + else: + raise TypeError(f"Unsupported metadata type: {type(metadata)}") + + # Create the variable with all attributes explicitly set return Variable( name=name, + long_name=long_name, dimensions=dimensions, data_type=data_type, compressor=compressor, coordinates=coordinates, - metadata=metadata, + metadata=var_metadata, ) @@ -76,4 +120,62 @@ def make_dataset( return Dataset( variables=variables, metadata=metadata, - ) \ No newline at end of file + ) + + +class AbstractTemplateFactory: + + def __init__(self, name: str): + self.name = name + self.api_version = "1.0.0" # TODO: Pull from package metadata + self.created_on = datetime.now(timezone.utc) + + + def AddDimension(self, name: str, size: int) -> 'AbstractTemplateFactory': + """Add a dimension to the factory.""" + self.dimensions.append(make_named_dimension(name, size)) + return self + + + def AddCoordinate(self, + name: str = "", + dimensions: List[NamedDimension | str] = [], + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + metadata: Optional[List[AllUnits | UserAttributes]] = None) -> 'AbstractTemplateFactory': + """Add a coordinate to the factory.""" + if name == "": + name = f"coord_{len(self.coordinates)}" + if dimensions == []: + dimensions = self.dimensions + self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) + return self + + def AddVariable(self, name: str = "", + dimensions: List[NamedDimension | str] = [], + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[VariableMetadata] = None) -> 'AbstractTemplateFactory': + """Add a variable to the factory.""" + if name == "": + name = f"var_{len(self.variables)}" + if dimensions == []: + dimensions = self.dimensions + self.variables.append(make_variable(name, dimensions, data_type, compressor, coordinates, metadata)) + return self + + def _compose_metadata(self) -> DatasetMetadata: + """Compose the DatasetMetadata with the given name, api_version, and created_on.""" + return make_dataset_metadata(self.name, self.api_version, self.created_on) + + + def _compose_variables(self) -> List[Variable]: + """Compose the Variables with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" + return [ + make_variable(self.name, self.dimensions, self.data_type, self.compressor, self.coordinates, self.metadata) + ] + + + def make_dataset(self, variables: List[Variable]) -> Dataset: + """Create a Dataset with the given variables and metadata.""" + return Dataset(variables=variables, metadata=self._compose_metadata()) From e757823d22e68a4d5ad9b7792298636456220724 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 2 May 2025 20:07:13 +0000 Subject: [PATCH 09/55] Begin implementing a builder pattern for Datasets --- src/mdio/schemas/v1/template_builder.py | 189 ++++++++++ tests/unit/schema/v1/test_template_builder.py | 331 ++++++++++++++++++ 2 files changed, 520 insertions(+) create mode 100644 src/mdio/schemas/v1/template_builder.py create mode 100644 tests/unit/schema/v1/test_template_builder.py diff --git a/src/mdio/schemas/v1/template_builder.py b/src/mdio/schemas/v1/template_builder.py new file mode 100644 index 00000000..85baf95e --- /dev/null +++ b/src/mdio/schemas/v1/template_builder.py @@ -0,0 +1,189 @@ +"""Builder pattern implementation for MDIO v1 schema models.""" + +from datetime import datetime, timezone +from typing import Any, Optional, List, Dict, Union +from enum import Enum, auto + +from pydantic import AwareDatetime + +from mdio.schema.dimension import NamedDimension +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.dataset import Dataset, DatasetMetadata +from mdio.schema.v1.variable import Variable, Coordinate, VariableMetadata +from mdio.schema.v1.template_factory import ( + make_named_dimension, + make_coordinate, + make_variable, + make_dataset_metadata, + make_dataset, +) + + +class _BuilderState(Enum): + """States for the template builder.""" + INITIAL = auto() + HAS_DIMENSIONS = auto() + HAS_COORDINATES = auto() + HAS_VARIABLES = auto() + +class TemplateBuilder: + """Builder for creating MDIO datasets with enforced build order: + 1. Must add dimensions first via add_dimension() + 2. Can optionally add coordinates via add_coordinate() + 3. Must add variables via add_variable() + 4. Must call build() to create the dataset + """ + def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): + self.name = name + self.api_version = "1.0.0" # TODO: Pull from package metadata + self.created_on = datetime.now(timezone.utc) + self.attributes = attributes + self._dimensions: List[NamedDimension] = [] + self._coordinates: List[Coordinate] = [] + self._variables: List[Variable] = [] + self._state = _BuilderState.INITIAL + self._unnamed_variable_counter = 0 + + def add_dimension(self, + name: str, + size: int, + long_name: str = None, + data_type: ScalarType | StructuredType = ScalarType.INT32, + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder': + """Add a dimension. This must be called at least once before adding coordinates or variables. + + Args: + name: Name of the dimension + size: Size of the dimension + long_name: Optional long name for the dimension variable + data_type: Data type for the dimension variable (defaults to INT32) + metadata: Optional metadata for the dimension variable + """ + # Create the dimension + dimension = make_named_dimension(name, size) + self._dimensions.append(dimension) + + # Create a variable for the dimension + dim_var = make_variable( + name=name, + long_name=long_name, + dimensions=[dimension], + data_type=data_type, + metadata=metadata + ) + self._variables.append(dim_var) + + self._state = _BuilderState.HAS_DIMENSIONS + return self + + def add_coordinate(self, + name: str = "", + *, + long_name: str = None, + dimensions: List[NamedDimension | str] = [], + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder': + """Add a coordinate after adding at least one dimension.""" + if self._state == _BuilderState.INITIAL: + raise ValueError("Must add at least one dimension before adding coordinates") + + if name == "": + name = f"coord_{len(self._coordinates)}" + if dimensions == []: + dimensions = self._dimensions + if isinstance(metadata, dict): + metadata = [metadata] + + # Convert string dimension names to NamedDimension objects + dim_objects = [] + for dim in dimensions: + if isinstance(dim, str): + dim_obj = next((d for d in self._dimensions if d.name == dim), None) + if dim_obj is None: + raise ValueError(f"Dimension '{dim}' not found") + dim_objects.append(dim_obj) + else: + dim_objects.append(dim) + + self._coordinates.append(make_coordinate( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + metadata=metadata + )) + self._state = _BuilderState.HAS_COORDINATES + return self + + def add_variable(self, + name: str = "", + *, + long_name: str = None, + dimensions: List[NamedDimension | str] = [], + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[VariableMetadata] = None) -> 'TemplateBuilder': + """Add a variable after adding at least one dimension.""" + if self._state == _BuilderState.INITIAL: + raise ValueError("Must add at least one dimension before adding variables") + + if name == "": + name = f"var_{self._unnamed_variable_counter}" + self._unnamed_variable_counter += 1 + if dimensions == []: + dimensions = self._dimensions + + # Convert string dimension names to NamedDimension objects + dim_objects = [] + for dim in dimensions: + if isinstance(dim, str): + dim_obj = next((d for d in self._dimensions if d.name == dim), None) + if dim_obj is None: + raise ValueError(f"Dimension '{dim}' not found") + dim_objects.append(dim_obj) + else: + dim_objects.append(dim) + + self._variables.append(make_variable( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=metadata + )) + self._state = _BuilderState.HAS_VARIABLES + return self + + def build(self) -> Dataset: + """Build the final dataset.""" + if self._state == _BuilderState.INITIAL: + raise ValueError("Must add at least one dimension before building") + + metadata = make_dataset_metadata( + self.name, + self.api_version, + self.created_on, + self.attributes + ) + + # Add coordinates as variables to the dataset + # We make a copy so that coordinates are not duplicated if the builder is reused + all_variables = self._variables.copy() + for coord in self._coordinates: + # Convert coordinate to variable + coord_var = make_variable( + name=coord.name, + long_name=coord.long_name, + dimensions=coord.dimensions, + data_type=coord.data_type, + metadata=coord.metadata + ) + all_variables.append(coord_var) + + return make_dataset(all_variables, metadata) \ No newline at end of file diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py new file mode 100644 index 00000000..9c23ec32 --- /dev/null +++ b/tests/unit/schema/v1/test_template_builder.py @@ -0,0 +1,331 @@ +"""Unit tests for the MDIO v1 template builder.""" + +import pytest +from datetime import datetime + +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType +from mdio.schema.v1.template_builder import ( + TemplateBuilder, + _BuilderState +) +from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.variable import Variable, Coordinate +from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.dtype import StructuredType +from mdio.core.v1.xarray_constructor import to_mdio_zarr + + +def test_builder_initialization(): + """Test basic builder initialization.""" + builder = TemplateBuilder("test_dataset") + assert builder.name == "test_dataset" + assert builder.api_version == "1.0.0" + assert isinstance(builder.created_on, datetime) + assert len(builder._dimensions) == 0 + assert len(builder._coordinates) == 0 + assert len(builder._variables) == 0 + assert builder._state == _BuilderState.INITIAL + + +def test_dimension_builder_state(): + """Test dimension builder state transitions and functionality.""" + builder = TemplateBuilder("test_dataset") + + # First dimension should change state to HAS_DIMENSIONS and create a variable + builder = builder.add_dimension("x", 100, long_name="X Dimension") + assert builder._state == _BuilderState.HAS_DIMENSIONS + assert len(builder._dimensions) == 1 + assert len(builder._variables) == 1 + assert builder._dimensions[0].name == "x" + assert builder._dimensions[0].size == 100 + assert builder._variables[0].name == "x" + assert builder._variables[0].long_name == "X Dimension" + assert builder._variables[0].data_type == ScalarType.INT32 + assert builder._variables[0].dimensions[0].name == "x" + + # Adding another dimension should maintain state and create another variable + builder = builder.add_dimension("y", 200, data_type=ScalarType.UINT32) + assert builder._state == _BuilderState.HAS_DIMENSIONS + assert len(builder._dimensions) == 2 + assert len(builder._variables) == 2 + assert builder._dimensions[1].name == "y" + assert builder._dimensions[1].size == 200 + assert builder._variables[1].name == "y" + assert builder._variables[1].data_type == ScalarType.UINT32 + assert builder._variables[1].dimensions[0].name == "y" + + +def test_dimension_with_metadata(): + """Test adding dimensions with custom metadata.""" + builder = TemplateBuilder("test_dataset") + + # Add dimension with custom metadata + builder = builder.add_dimension( + "depth", + size=100, + data_type=ScalarType.FLOAT32, + metadata={"unitsV1": {"length": "m"}} + ) + + assert len(builder._variables) == 1 + depth_var = builder._variables[0] + assert depth_var.name == "depth" + assert depth_var.data_type == ScalarType.FLOAT32 + assert depth_var.metadata.units_v1.length == "m" + + +def test_coordinate_builder_state(): + """Test coordinate builder state transitions and functionality.""" + builder = TemplateBuilder("test_dataset") + + # Should not be able to add coordinates before dimensions + with pytest.raises(ValueError, match="Must add at least one dimension before adding coordinates"): + builder.add_coordinate("x_coord", dimensions=["x"]) + + # Add dimensions first + builder = builder.add_dimension("x", 100) + builder = builder.add_dimension("y", 200) + + # Adding coordinate should change state to HAS_COORDINATES + builder = builder.add_coordinate("x_coord", dimensions=["x"], long_name="X Coordinate") + assert builder._state == _BuilderState.HAS_COORDINATES + assert len(builder._coordinates) == 1 + assert builder._coordinates[0].name == "x_coord" + assert builder._coordinates[0].long_name == "X Coordinate" + assert builder._coordinates[0].dimensions[0].name == "x" + + # Adding another coordinate should maintain state + builder = builder.add_coordinate("y_coord", dimensions=["y"]) + assert builder._state == _BuilderState.HAS_COORDINATES + assert len(builder._coordinates) == 2 + assert builder._coordinates[1].name == "y_coord" + assert builder._coordinates[1].dimensions[0].name == "y" + + +def test_variable_builder_state(): + """Test variable builder state transitions and functionality.""" + builder = TemplateBuilder("test_dataset") + + # Should not be able to add variables before dimensions + with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): + builder.add_variable("data", dimensions=["x"]) + + # Add dimension first + builder = builder.add_dimension("x", 100) + + # Adding variable should change state to HAS_VARIABLES + builder = builder.add_variable("data", dimensions=["x"], long_name="Data Variable") + assert builder._state == _BuilderState.HAS_VARIABLES + assert len(builder._variables) == 2 # One for dimension, one for variable + assert builder._variables[1].name == "data" + assert builder._variables[1].long_name == "Data Variable" + assert builder._variables[1].dimensions[0].name == "x" + + # Adding another variable should maintain state + builder = builder.add_variable("data2", dimensions=["x"]) + assert builder._state == _BuilderState.HAS_VARIABLES + assert len(builder._variables) == 3 # One for dimension, two for variables + assert builder._variables[2].name == "data2" + assert builder._variables[2].dimensions[0].name == "x" + + +def test_build_dataset(): + """Test building a complete dataset.""" + dataset = (TemplateBuilder("test_dataset") + .add_dimension("x", 100) + .add_dimension("y", 200) + .add_coordinate("x_coord", dimensions=["x"]) + .add_coordinate("y_coord", dimensions=["y"]) + .add_variable("data", dimensions=["x", "y"], long_name="Test Data") + .build()) + + assert isinstance(dataset, Dataset) + assert dataset.metadata.name == "test_dataset" + # Two dimension variables + one data variable + two coordinate variables + assert len(dataset.variables) == 5 + assert dataset.variables[0].name == "x" + assert dataset.variables[1].name == "y" + assert dataset.variables[2].name == "data" + assert dataset.variables[2].long_name == "Test Data" + assert len(dataset.variables[2].dimensions) == 2 + + +def test_auto_naming(): + """Test automatic naming of coordinates and variables.""" + dataset = (TemplateBuilder("test_dataset") + .add_dimension("x", 100) + .add_coordinate() # Should be named "coord_0" + .add_coordinate() # Should be named "coord_1" + .add_variable() # Should be named "var_0" + .add_variable() # Should be named "var_1" + .build()) + + assert dataset.variables[0].name == "x" # Dimension variable + assert dataset.variables[1].name == "var_0" + assert dataset.variables[2].name == "var_1" + + +def test_default_dimensions(): + """Test that coordinates and variables use all dimensions by default.""" + dataset = (TemplateBuilder("test_dataset") + .add_dimension("x", 100) + .add_dimension("y", 200) + .add_coordinate() # Should use both x and y dimensions + .add_variable() # Should use both x and y dimensions + .build()) + + # Two dimension variables + one data variable + one coordinate variable + assert len(dataset.variables) == 4 + assert dataset.variables[2].name == "var_0" + assert len(dataset.variables[2].dimensions) == 2 + assert dataset.variables[2].dimensions[0].name == "x" + assert dataset.variables[2].dimensions[1].name == "y" + + +def test_build_order_enforcement(): + """Test that the builder enforces the correct build order.""" + builder = TemplateBuilder("test_dataset") + + # Should not be able to add coordinates before dimensions + with pytest.raises(ValueError, match="Must add at least one dimension before adding coordinates"): + builder.add_coordinate("x_coord", dimensions=["x"]) + + # Should not be able to add variables before dimensions + with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): + builder.add_variable("data", dimensions=["x"]) + + # Should not be able to build without dimensions + with pytest.raises(ValueError, match="Must add at least one dimension before building"): + builder.build() + + +def test_toy_example(): + """Test building a toy dataset with multiple variables and attributes.""" + dataset = (TemplateBuilder("campos_3d", attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... " + ], + "foo": "bar" + }) + # Add dimensions + .add_dimension("inline", 256, data_type=ScalarType.UINT32) + .add_dimension("crossline", 512, data_type=ScalarType.UINT32) + .add_dimension("depth", 384, data_type=ScalarType.UINT32, metadata={"unitsV1": {"length": "m"}}) + + # Add coordinates + .add_coordinate("cdp-x", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) + .add_coordinate("cdp-y", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) + + # Add image variable + .add_variable( + name="image", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + }, + "attributes": {"fizz": "buzz"} + } + ) + + # Add velocity variable + .add_variable( + name="velocity", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT16, + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "unitsV1": {"speed": "m/s"} + } + ) + + # Add inline-optimized image variable + .add_variable( + name="image_inline", + long_name="inline optimized version of 3d_stack", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}} + } + ) + + # Add headers variable with structured dtype + .add_variable( + name="image_headers", + dimensions=["inline", "crossline"], + data_type=StructuredType(fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16} + ]), + coordinates=["inline", "crossline", "cdp-x", "cdp-y"], + # metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} + ) + .build()) + + # print(dataset.model_dump_json(indent=2)) + + path = "test.mdio1" + to_mdio_zarr(dataset, path) + + # Verify dataset structure + assert dataset.metadata.name == "campos_3d" + assert dataset.metadata.api_version == "1.0.0" + assert dataset.metadata.attributes["foo"] == "bar" + assert len(dataset.metadata.attributes["textHeader"]) == 3 + + # Verify variables (including dimension variables) + # 3 dimension variables + 4 data variables + 2 coordinate variables + assert len(dataset.variables) == 9 + + # Verify dimension variables + inline_var = next(v for v in dataset.variables if v.name == "inline") + assert inline_var.data_type == ScalarType.UINT32 + assert len(inline_var.dimensions) == 1 + assert inline_var.dimensions[0].name == "inline" + + depth_var = next(v for v in dataset.variables if v.name == "depth") + assert depth_var.data_type == ScalarType.UINT32 + assert depth_var.metadata.units_v1.length == "m" + + # Verify image variable + image = next(v for v in dataset.variables if v.name == "image") + assert image.data_type == ScalarType.FLOAT32 + assert isinstance(image.compressor, Blosc) + assert image.compressor.algorithm == "zstd" + assert image.metadata.stats_v1.count == 100 + + # Verify velocity variable + velocity = next(v for v in dataset.variables if v.name == "velocity") + assert velocity.data_type == ScalarType.FLOAT16 + assert velocity.compressor is None + assert velocity.metadata.units_v1.speed == "m/s" + + # Verify image_inline variable + image_inline = next(v for v in dataset.variables if v.name == "image_inline") + assert image_inline.long_name == "inline optimized version of 3d_stack" + assert isinstance(image_inline.compressor, Blosc) + assert image_inline.compressor.algorithm == "zstd" + + # Verify image_headers variable + headers = next(v for v in dataset.variables if v.name == "image_headers") + assert isinstance(headers.data_type, StructuredType) + assert len(headers.data_type.fields) == 4 + assert headers.data_type.fields[0].name == "cdp-x" From 0ea7d1b6344c8e44cfd3be6764dedf9af20db860 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 5 May 2025 13:38:44 +0000 Subject: [PATCH 10/55] Fix zfp in test --- noxfile.py | 1 + src/mdio/core/v1/xarray_constructor.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 281ec525..e9bb6408 100644 --- a/noxfile.py +++ b/noxfile.py @@ -200,6 +200,7 @@ def tests(session: Session) -> None: "pygments", "pytest-dependency", "s3fs", + "zfpy" # TODO(BrianMichell): Ensure this is pulling from the pyproject.toml ], ) diff --git a/src/mdio/core/v1/xarray_constructor.py b/src/mdio/core/v1/xarray_constructor.py index 3dd4a898..9c9015ea 100644 --- a/src/mdio/core/v1/xarray_constructor.py +++ b/src/mdio/core/v1/xarray_constructor.py @@ -38,7 +38,6 @@ def convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZ tolerance=model.tolerance, rate=model.rate, precision=model.precision, - write_header=model.write_header ) elif model is None: return None From 8ead67ba69f1f37c75cf9c661ab22ae0c63ec1bf Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 5 May 2025 13:54:55 +0000 Subject: [PATCH 11/55] Begin cleanup and removal of direct xarray references --- src/mdio/core/v1/xarray_constructor.py | 83 ++++++++++++------- tests/integration/test_xarray_constructor.py | 4 +- tests/unit/schema/v1/test_template_builder.py | 4 +- 3 files changed, 58 insertions(+), 33 deletions(-) diff --git a/src/mdio/core/v1/xarray_constructor.py b/src/mdio/core/v1/xarray_constructor.py index 9c9015ea..a1c63cdd 100644 --- a/src/mdio/core/v1/xarray_constructor.py +++ b/src/mdio/core/v1/xarray_constructor.py @@ -22,7 +22,7 @@ BaseZFPY = None NumcodecsZFPY = None -def convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZFPY | None: +def _convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZFPY | None: if isinstance(model, Blosc): return NumcodecsBlosc( cname=model.algorithm.value, @@ -45,8 +45,18 @@ def convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZ raise TypeError(f"Unsupported compressor model: {type(model)}") -def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: - """Build an empty xarray.Dataset with correct dimensions and dtypes.""" +def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: + """Build an MDIO dataset with correct dimensions and dtypes. + + This internal function constructs the underlying data structure for an MDIO dataset, + handling dimension mapping, data types, and metadata organization. + + Args: + mdio_ds: The source MDIO dataset to construct from. + + Returns: + The constructed dataset with proper MDIO structure and metadata. + """ # Collect dimension sizes dims: dict[str, int] = {} for var in mdio_ds.variables: @@ -70,7 +80,9 @@ def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: arr = np.zeros(shape, dtype=dtype) data_array = xr.DataArray(arr, dims=dim_names) # set default fill_value to zero instead of NaN - data_array.encoding['fill_value'] = 0.0 # TODO: This seems to be ignored by xarray + # TODO: This seems to be ignored by xarray. + # Setting in the _generate_encodings() function does work though. + data_array.encoding['fill_value'] = 0.0 # Set long_name if present if var.long_name is not None: @@ -113,31 +125,44 @@ def construct_xarray_dataset(mdio_ds: MDIODataset) -> xr.Dataset: return ds -def to_mdio_zarr(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: - """Construct an xarray.Dataset and write it to a Zarr store. Returns the xarray.Dataset.""" - ds = construct_xarray_dataset(mdio_ds) - # Write to Zarr format v2 with consolidated metadata and all attributes - enc = V2ChunkKeyEncoding(separator="/").to_dict() - global_encodings = {} - for var in mdio_ds.variables: - fill_value = 0 - if isinstance(var.data_type, StructuredType): - # Create a structured fill value that matches the dtype - # fill_value = np.zeros(1, dtype=[(f.name, f.format.value) for f in var.data_type.fields])[0] - # TODO: Re-enable this once xarray supports this PR https://github.com/zarr-developers/zarr-python/pull/3015 - continue - chunks = None - if var.metadata is not None and var.metadata.chunk_grid is not None: - chunks = var.metadata.chunk_grid.configuration.chunk_shape - global_encodings[var.name] = { - "chunks": chunks, - # TODO: Re-enable this once xarray supports this PR https://github.com/pydata/xarray/pull/10274 - # "chunk_key_encoding": enc, - "_FillValue": fill_value, - "dtype": var.data_type, - "compressors": convert_compressor(var.compressor), - } + +def Write_MDIO_metadata(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: + """Write MDIO metadata to a Zarr store and return the constructed xarray.Dataset. + + This function constructs an xarray.Dataset from the MDIO dataset and writes its metadata + to a Zarr store. The actual data is not written, only the metadata structure is created. + """ + ds = _construct_mdio_dataset(mdio_ds) + # Write to Zarr format v2 with consolidated metadata and all attributes + + def _generate_encodings() -> dict: + """Generate encodings for each variable in the MDIO dataset. + + Returns: + Dictionary mapping variable names to their encoding configurations. + """ + dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() + global_encodings = {} + for var in mdio_ds.variables: + fill_value = 0 + if isinstance(var.data_type, StructuredType): + # Create a structured fill value that matches the dtype + # fill_value = np.zeros(1, dtype=[(f.name, f.format.value) for f in var.data_type.fields])[0] + # TODO: Re-enable this once xarray supports this PR https://github.com/zarr-developers/zarr-python/pull/3015 + continue + chunks = None + if var.metadata is not None and var.metadata.chunk_grid is not None: + chunks = var.metadata.chunk_grid.configuration.chunk_shape + global_encodings[var.name] = { + "chunks": chunks, + # TODO: Re-enable this once xarray supports this PR https://github.com/pydata/xarray/pull/10274 + # "chunk_key_encoding": dimension_separator_encoding, + "_FillValue": fill_value, + "dtype": var.data_type, + "compressors": _convert_compressor(var.compressor), + } + return global_encodings ds.to_mdio(store, mode="w", @@ -145,6 +170,6 @@ def to_mdio_zarr(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: consolidated=True, safe_chunks=False, # This ignores the Dask chunks compute=False, # Ensures only the metadata is written - encoding=global_encodings, + encoding=_generate_encodings(), **kwargs) return ds \ No newline at end of file diff --git a/tests/integration/test_xarray_constructor.py b/tests/integration/test_xarray_constructor.py index 4d7e3220..46585ab3 100644 --- a/tests/integration/test_xarray_constructor.py +++ b/tests/integration/test_xarray_constructor.py @@ -13,7 +13,7 @@ make_dataset_metadata, make_dataset, ) -from mdio.core.v1.xarray_constructor import to_mdio_zarr +from mdio.core.v1.xarray_constructor import Write_MDIO_metadata def build_toy_dataset(): @@ -136,7 +136,7 @@ def test_to_mdio_zarr_writes_and_returns_xarray(tmp_path): # store_path = tmp_path / "toy.zarr" store_path = "test.mdio1" # write to Zarr and get back xarray.Dataset - ds_out = to_mdio_zarr(ds_in, str(store_path)) + ds_out = Write_MDIO_metadata(ds_in, str(store_path)) # global attributes should be present on the returned Dataset assert ds_out.attrs["apiVersion"] == ds_in.metadata.api_version assert ds_out.attrs["createdOn"] == str(ds_in.metadata.created_on) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 9c23ec32..e17783af 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -13,7 +13,7 @@ from mdio.schema.v1.variable import Variable, Coordinate from mdio.schema.compressors import Blosc, ZFP from mdio.schema.dtype import StructuredType -from mdio.core.v1.xarray_constructor import to_mdio_zarr +from mdio.core.v1.xarray_constructor import Write_MDIO_metadata def test_builder_initialization(): @@ -283,7 +283,7 @@ def test_toy_example(): # print(dataset.model_dump_json(indent=2)) path = "test.mdio1" - to_mdio_zarr(dataset, path) + Write_MDIO_metadata(dataset, path) # Verify dataset structure assert dataset.metadata.name == "campos_3d" From 35895d56d59927d73eeedc7f0d391a3135515c93 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 5 May 2025 13:57:26 +0000 Subject: [PATCH 12/55] Use temp path --- tests/integration/test_xarray_constructor.py | 3 +-- tests/unit/schema/v1/test_template_builder.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_xarray_constructor.py b/tests/integration/test_xarray_constructor.py index 46585ab3..ffe349ac 100644 --- a/tests/integration/test_xarray_constructor.py +++ b/tests/integration/test_xarray_constructor.py @@ -133,8 +133,7 @@ def build_toy_dataset(): def test_to_mdio_zarr_writes_and_returns_xarray(tmp_path): ds_in = build_toy_dataset() - # store_path = tmp_path / "toy.zarr" - store_path = "test.mdio1" + store_path = tmp_path / "toy.mdio" # write to Zarr and get back xarray.Dataset ds_out = Write_MDIO_metadata(ds_in, str(store_path)) # global attributes should be present on the returned Dataset diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index e17783af..d2d166d7 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -200,7 +200,7 @@ def test_build_order_enforcement(): builder.build() -def test_toy_example(): +def test_toy_example(tmp_path): """Test building a toy dataset with multiple variables and attributes.""" dataset = (TemplateBuilder("campos_3d", attributes={ "textHeader": [ @@ -282,7 +282,7 @@ def test_toy_example(): # print(dataset.model_dump_json(indent=2)) - path = "test.mdio1" + path = tmp_path / "toy.mdio" Write_MDIO_metadata(dataset, path) # Verify dataset structure From cf1ea0174f89b2bf2f929f938dd4398e2dca31f6 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 5 May 2025 14:01:49 +0000 Subject: [PATCH 13/55] Rename source files --- src/mdio/core/v1/{xarray_constructor.py => constructor.py} | 0 .../{test_xarray_constructor.py => test_v1_constructor.py} | 2 +- tests/unit/schema/v1/test_template_builder.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/mdio/core/v1/{xarray_constructor.py => constructor.py} (100%) rename tests/integration/{test_xarray_constructor.py => test_v1_constructor.py} (98%) diff --git a/src/mdio/core/v1/xarray_constructor.py b/src/mdio/core/v1/constructor.py similarity index 100% rename from src/mdio/core/v1/xarray_constructor.py rename to src/mdio/core/v1/constructor.py diff --git a/tests/integration/test_xarray_constructor.py b/tests/integration/test_v1_constructor.py similarity index 98% rename from tests/integration/test_xarray_constructor.py rename to tests/integration/test_v1_constructor.py index ffe349ac..2b9af6a6 100644 --- a/tests/integration/test_xarray_constructor.py +++ b/tests/integration/test_v1_constructor.py @@ -13,7 +13,7 @@ make_dataset_metadata, make_dataset, ) -from mdio.core.v1.xarray_constructor import Write_MDIO_metadata +from mdio.core.v1.constructor import Write_MDIO_metadata def build_toy_dataset(): diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index d2d166d7..701c40aa 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -13,7 +13,7 @@ from mdio.schema.v1.variable import Variable, Coordinate from mdio.schema.compressors import Blosc, ZFP from mdio.schema.dtype import StructuredType -from mdio.core.v1.xarray_constructor import Write_MDIO_metadata +from mdio.core.v1.constructor import Write_MDIO_metadata def test_builder_initialization(): From 9386eb8c8c1f1d138624c0bfa012e2ddafdb98c7 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 5 May 2025 14:43:54 +0000 Subject: [PATCH 14/55] Update overloads --- src/mdio/core/v1/__init__.py | 31 ++++--------- src/mdio/core/v1/_overloads.py | 82 ++++++++++++++++++++++----------- src/mdio/core/v1/constructor.py | 68 ++++++++++++--------------- 3 files changed, 93 insertions(+), 88 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index e3f9e8f2..b6d98d52 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -1,27 +1,12 @@ -# mdio/__init__.py +""" +MDIO core v1 package initialization. +Exposes the MDIO overloads and core v1 functionality. +""" -import xarray as _xr -from ._overloads import open_mdio, to_mdio +from ._overloads import mdio +from .constructor import Write_MDIO_metadata __all__ = [ - # explicit overrides / aliases - "open_mdio", - "to_mdio", - # everything else will be auto-populated by __dir__ / __getattr__ + "mdio", + "Write_MDIO_metadata", ] - -def __getattr__(name: str): - """ - Fallback: anything not defined in mdio/__init__.py - gets looked up on xarray. - """ - if hasattr(_xr, name): - return getattr(_xr, name) - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - -def __dir__(): - """ - Make dir(mdio) list our overrides and then all public xarray names. - """ - xr_public = [n for n in dir(_xr) if not n.startswith("_")] - return sorted(__all__ + xr_public) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index 7085073c..1068a961 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -5,32 +5,62 @@ 1. To provide a consistent mdio.* naming scheme. 2. To simplify the API for users where it makes sense (e.g. MDIO v1 uses Zarr and not HDF5). """ +import xarray as xr +from xarray import Dataset as _Dataset, DataArray as _DataArray -import xarray as _xr -from xarray import Dataset as _Dataset, DataArray as _DataArray +class MDIODataset(_Dataset): + """xarray.Dataset subclass with MDIO v1 extensions.""" + __slots__ = () + + def to_mdio(self, store=None, *args, **kwargs): + """ + Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store. + """ + print("👋 hello world from mdio.to_mdio!") + return super().to_zarr(store=store, *args, **kwargs) + + +class MDIODataArray(_DataArray): + """xarray.DataArray subclass with MDIO v1 extensions.""" + __slots__ = () + + def to_mdio(self, store=None, *args, **kwargs): + """ + Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store. + """ + print("👋 hello world from mdio.to_mdio!") + return super().to_zarr(store=store, *args, **kwargs) + + +class MDIO: + """MDIO namespace for overloaded types and functions.""" + Dataset = MDIODataset + DataArray = MDIODataArray + + @staticmethod + def open(store, *args, engine="zarr", consolidated=False, **kwargs): + """ + Open a Zarr store as an MDIODataset. Prints a greeting and casts + the returned xarray.Dataset (and its variables) to the MDIO subclasses. + """ + print("👋 hello world from mdio.open!") + ds = xr.open_dataset( + store, + engine=engine, + consolidated=consolidated, + *args, + **kwargs, + ) + # Cast Dataset to MDIODataset + ds.__class__ = MDIODataset + # Cast each DataArray in data_vars and coords + for name, var in ds.data_vars.items(): + var.__class__ = MDIODataArray + for name, coord in ds.coords.items(): + coord.__class__ = MDIODataArray + return ds + -def open_mdio(store, *args, engine="zarr", consolidated=False, **kwargs): - """ - Our mdio version of xr.open_zarr. Prints a greeting, - then calls xr.open_dataset(..., engine="zarr"). - """ - print("👋 hello world from mdio.open_mdio!") - return _xr.open_dataset(store, *args, - engine=engine, - consolidated=consolidated, - **kwargs) - -def to_mdio(self, *args, **kwargs): - """ - Alias for .to_zarr, renamed to .to_mdio, - so you get a consistent mdio.* naming. - """ - print("👋 hello world from mdio.to_mdio!") - print(f"kwargs: {kwargs}") - return self.to_zarr(*args, **kwargs) - -# Monkey-patch Dataset and DataArray so that you can do: -# ds.to_mdio(...) and arr.to_mdio(...) -_Dataset.to_mdio = to_mdio -_DataArray.to_mdio = to_mdio +# Create module-level MDIO namespace +mdio = MDIO() diff --git a/src/mdio/core/v1/constructor.py b/src/mdio/core/v1/constructor.py index a1c63cdd..332fd7ce 100644 --- a/src/mdio/core/v1/constructor.py +++ b/src/mdio/core/v1/constructor.py @@ -1,4 +1,4 @@ -"""Construct an Xarray Dataset from an MDIO v1 Dataset and write to Zarr.""" +"""Construct an MDIO dataset and write to Zarr.""" import xarray as xr import numpy as np import dask.array as da @@ -10,18 +10,19 @@ from mdio.schema.dtype import ScalarType, StructuredType from mdio.schema.compressors import Blosc, ZFP from mdio.schema.v1.variable import Coordinate - +from mdio.core.v1._overloads import mdio from numcodecs import Blosc as NumcodecsBlosc try: - import zfpy as BaseZFPY # Baser library + import zfpy as BaseZFPY # Base library from numcodecs import ZFPY as NumcodecsZFPY # Codec except ImportError: - print(f"Tried to import zfpy and numcodes zfpy but failed because {ImportError}") + print(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") BaseZFPY = None NumcodecsZFPY = None + def _convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZFPY | None: if isinstance(model, Blosc): return NumcodecsBlosc( @@ -45,7 +46,7 @@ def _convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | Numcodecs raise TypeError(f"Unsupported compressor model: {type(model)}") -def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: +def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: """Build an MDIO dataset with correct dimensions and dtypes. This internal function constructs the underlying data structure for an MDIO dataset, @@ -65,7 +66,7 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: dims[d.name] = d.size # Build data variables - data_vars: dict[str, xr.DataArray] = {} + data_vars: dict[str, mdio.DataArray] = {} for var in mdio_ds.variables: dim_names = [d.name if isinstance(d, NamedDimension) else d for d in var.dimensions] shape = tuple(dims[name] for name in dim_names) @@ -76,12 +77,8 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: dtype = np.dtype([(f.name, f.format.value) for f in dt.fields]) else: raise TypeError(f"Unsupported data_type: {dt}") - # arr = da.zeros(shape, dtype=dtype) arr = np.zeros(shape, dtype=dtype) - data_array = xr.DataArray(arr, dims=dim_names) - # set default fill_value to zero instead of NaN - # TODO: This seems to be ignored by xarray. - # Setting in the _generate_encodings() function does work though. + data_array = mdio.DataArray(arr, dims=dim_names) data_array.encoding['fill_value'] = 0.0 # Set long_name if present @@ -90,33 +87,30 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: # Set coordinates if present, excluding dimension names if var.coordinates is not None: - # Get the set of dimension names for this variable dim_set = set(dim_names) - # Filter out any coordinates that are also dimensions coord_names = [ - c.name if isinstance(c, Coordinate) else c - for c in var.coordinates + c.name if isinstance(c, Coordinate) else c + for c in var.coordinates if (c.name if isinstance(c, Coordinate) else c) not in dim_set ] - if coord_names: # Only set coordinates if there are any non-dimension coordinates + if coord_names: data_array.attrs["coordinates"] = " ".join(coord_names) - # attach variable metadata into DataArray attributes, excluding nulls and chunkGrid + # Attach variable metadata into DataArray attributes if var.metadata is not None: md = var.metadata.model_dump( by_alias=True, exclude_none=True, exclude={"chunk_grid"}, ) - # Convert single-element lists to objects for key, value in md.items(): if isinstance(value, list) and len(value) == 1: md[key] = value[0] data_array.attrs.update(md) data_vars[var.name] = data_array - ds = xr.Dataset(data_vars) - # Attach metadata as attrs + ds = mdio.Dataset(data_vars) + # Attach dataset metadata ds.attrs["apiVersion"] = mdio_ds.metadata.api_version ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) ds.attrs["name"] = mdio_ds.metadata.name @@ -125,16 +119,13 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> xr.Dataset: return ds - - -def Write_MDIO_metadata(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> xr.Dataset: - """Write MDIO metadata to a Zarr store and return the constructed xarray.Dataset. +def Write_MDIO_metadata(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> mdio.Dataset: + """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. - This function constructs an xarray.Dataset from the MDIO dataset and writes its metadata + This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata to a Zarr store. The actual data is not written, only the metadata structure is created. """ ds = _construct_mdio_dataset(mdio_ds) - # Write to Zarr format v2 with consolidated metadata and all attributes def _generate_encodings() -> dict: """Generate encodings for each variable in the MDIO dataset. @@ -147,16 +138,13 @@ def _generate_encodings() -> dict: for var in mdio_ds.variables: fill_value = 0 if isinstance(var.data_type, StructuredType): - # Create a structured fill value that matches the dtype - # fill_value = np.zeros(1, dtype=[(f.name, f.format.value) for f in var.data_type.fields])[0] - # TODO: Re-enable this once xarray supports this PR https://github.com/zarr-developers/zarr-python/pull/3015 continue chunks = None if var.metadata is not None and var.metadata.chunk_grid is not None: chunks = var.metadata.chunk_grid.configuration.chunk_shape global_encodings[var.name] = { "chunks": chunks, - # TODO: Re-enable this once xarray supports this PR https://github.com/pydata/xarray/pull/10274 + # TODO: Re-enable chunk_key_encoding when supported by xarray # "chunk_key_encoding": dimension_separator_encoding, "_FillValue": fill_value, "dtype": var.data_type, @@ -164,12 +152,14 @@ def _generate_encodings() -> dict: } return global_encodings - ds.to_mdio(store, - mode="w", - zarr_format=2, - consolidated=True, - safe_chunks=False, # This ignores the Dask chunks - compute=False, # Ensures only the metadata is written - encoding=_generate_encodings(), - **kwargs) - return ds \ No newline at end of file + ds.to_mdio( + store, + mode="w", + zarr_format=2, + consolidated=True, + safe_chunks=False, + compute=False, + encoding=_generate_encodings(), + **kwargs + ) + return ds From df3d1483d9f18bda1d1cc8ef7478cd57d134cb5e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 15:03:27 +0000 Subject: [PATCH 15/55] Linting --- noxfile.py | 2 +- src/mdio/core/v1/__init__.py | 9 +- src/mdio/core/v1/_overloads.py | 34 +-- src/mdio/core/v1/constructor.py | 80 ++++--- src/mdio/schemas/v1/template_builder.py | 133 +++++++----- src/mdio/schemas/v1/template_factory.py | 112 +++++++--- tests/integration/test_v1_constructor.py | 117 ++++++---- tests/unit/schema/v1/test_template_builder.py | 200 +++++++++++------- tests/unit/test_template_factory.py | 165 ++++++++++----- 9 files changed, 528 insertions(+), 324 deletions(-) diff --git a/noxfile.py b/noxfile.py index e9bb6408..759886c8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -200,7 +200,7 @@ def tests(session: Session) -> None: "pygments", "pytest-dependency", "s3fs", - "zfpy" # TODO(BrianMichell): Ensure this is pulling from the pyproject.toml + "zfpy", # TODO(BrianMichell): Ensure this is pulling from the pyproject.toml ], ) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index b6d98d52..e4be7f39 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -1,12 +1,13 @@ -""" -MDIO core v1 package initialization. +"""MDIO core v1 package initialization. + Exposes the MDIO overloads and core v1 functionality. """ from ._overloads import mdio -from .constructor import Write_MDIO_metadata +from .constructor import write_mdio_metadata + __all__ = [ "mdio", - "Write_MDIO_metadata", + "write_mdio_metadata", ] diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index 1068a961..e38eec6a 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -1,63 +1,63 @@ -""" -Overloads for xarray. +"""Overloads for xarray. The intent of overloading here is: 1. To provide a consistent mdio.* naming scheme. 2. To simplify the API for users where it makes sense (e.g. MDIO v1 uses Zarr and not HDF5). """ + import xarray as xr -from xarray import Dataset as _Dataset, DataArray as _DataArray +from xarray import DataArray as _DataArray +from xarray import Dataset as _Dataset class MDIODataset(_Dataset): """xarray.Dataset subclass with MDIO v1 extensions.""" + __slots__ = () def to_mdio(self, store=None, *args, **kwargs): - """ - Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store. - """ + """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") - return super().to_zarr(store=store, *args, **kwargs) + return super().to_zarr(*args, store=store, **kwargs) class MDIODataArray(_DataArray): """xarray.DataArray subclass with MDIO v1 extensions.""" + __slots__ = () def to_mdio(self, store=None, *args, **kwargs): - """ - Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store. - """ + """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") - return super().to_zarr(store=store, *args, **kwargs) + return super().to_zarr(*args, store=store, **kwargs) class MDIO: """MDIO namespace for overloaded types and functions.""" + Dataset = MDIODataset DataArray = MDIODataArray @staticmethod def open(store, *args, engine="zarr", consolidated=False, **kwargs): - """ - Open a Zarr store as an MDIODataset. Prints a greeting and casts - the returned xarray.Dataset (and its variables) to the MDIO subclasses. + """Open a Zarr store as an MDIODataset. + + Casts the returned xarray.Dataset (and its variables) to the MDIO subclasses. """ print("👋 hello world from mdio.open!") ds = xr.open_dataset( store, + *args, engine=engine, consolidated=consolidated, - *args, **kwargs, ) # Cast Dataset to MDIODataset ds.__class__ = MDIODataset # Cast each DataArray in data_vars and coords - for name, var in ds.data_vars.items(): + for _name, var in ds.data_vars.items(): var.__class__ = MDIODataArray - for name, coord in ds.coords.items(): + for _name, coord in ds.coords.items(): coord.__class__ = MDIODataArray return ds diff --git a/src/mdio/core/v1/constructor.py b/src/mdio/core/v1/constructor.py index 332fd7ce..76af2b35 100644 --- a/src/mdio/core/v1/constructor.py +++ b/src/mdio/core/v1/constructor.py @@ -1,40 +1,44 @@ """Construct an MDIO dataset and write to Zarr.""" -import xarray as xr -import numpy as np -import dask.array as da -from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + from typing import Any -from mdio.schema.v1.dataset import Dataset as MDIODataset +import numpy as np +from numcodecs import Blosc as NumcodecsBlosc +from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 + +from mdio.core.v1._overloads import mdio +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType, StructuredType -from mdio.schema.compressors import Blosc, ZFP +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.v1.dataset import Dataset as MDIODataset from mdio.schema.v1.variable import Coordinate -from mdio.core.v1._overloads import mdio -from numcodecs import Blosc as NumcodecsBlosc try: - import zfpy as BaseZFPY # Base library - from numcodecs import ZFPY as NumcodecsZFPY # Codec + import zfpy as zfpy_base # Base library + from numcodecs import ZFPY # Codec except ImportError: print(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") - BaseZFPY = None - NumcodecsZFPY = None + zfpy_base = None + ZFPY = None -def _convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | NumcodecsZFPY | None: +def _convert_compressor( + model: Blosc | ZFP | None, +) -> NumcodecsBlosc | ZFPY | None: if isinstance(model, Blosc): return NumcodecsBlosc( cname=model.algorithm.value, clevel=model.level, shuffle=model.shuffle.value, - blocksize=model.blocksize if model.blocksize > 0 else 0 + blocksize=model.blocksize if model.blocksize > 0 else 0, ) elif isinstance(model, ZFP): - if BaseZFPY is None or NumcodecsZFPY is None: + if zfpy_base is None or ZFPY is None: raise ImportError("zfpy and numcodecs are required to use ZFP compression") - return NumcodecsZFPY( + return ZFPY( mode=model.mode.value, tolerance=model.tolerance, rate=model.rate, @@ -46,17 +50,20 @@ def _convert_compressor(model: Blosc | ZFP | None) -> NumcodecsBlosc | Numcodecs raise TypeError(f"Unsupported compressor model: {type(model)}") -def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: +def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 """Build an MDIO dataset with correct dimensions and dtypes. - + This internal function constructs the underlying data structure for an MDIO dataset, handling dimension mapping, data types, and metadata organization. - + Args: mdio_ds: The source MDIO dataset to construct from. - + Returns: The constructed dataset with proper MDIO structure and metadata. + + Raises: + TypeError: If an unsupported data type is encountered. """ # Collect dimension sizes dims: dict[str, int] = {} @@ -68,7 +75,9 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # Build data variables data_vars: dict[str, mdio.DataArray] = {} for var in mdio_ds.variables: - dim_names = [d.name if isinstance(d, NamedDimension) else d for d in var.dimensions] + dim_names = [ + d.name if isinstance(d, NamedDimension) else d for d in var.dimensions + ] shape = tuple(dims[name] for name in dim_names) dt = var.data_type if isinstance(dt, ScalarType): @@ -79,23 +88,23 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: raise TypeError(f"Unsupported data_type: {dt}") arr = np.zeros(shape, dtype=dtype) data_array = mdio.DataArray(arr, dims=dim_names) - data_array.encoding['fill_value'] = 0.0 - + data_array.encoding["fill_value"] = 0.0 + # Set long_name if present if var.long_name is not None: data_array.attrs["long_name"] = var.long_name - + # Set coordinates if present, excluding dimension names if var.coordinates is not None: dim_set = set(dim_names) coord_names = [ - c.name if isinstance(c, Coordinate) else c - for c in var.coordinates + c.name if isinstance(c, Coordinate) else c + for c in var.coordinates if (c.name if isinstance(c, Coordinate) else c) not in dim_set ] if coord_names: data_array.attrs["coordinates"] = " ".join(coord_names) - + # Attach variable metadata into DataArray attributes if var.metadata is not None: md = var.metadata.model_dump( @@ -119,21 +128,24 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: return ds -def Write_MDIO_metadata(mdio_ds: MDIODataset, store: str, **kwargs: Any) -> mdio.Dataset: +def write_mdio_metadata( + mdio_ds: MDIODataset, store: str, **kwargs: Any +) -> mdio.Dataset: """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. - + This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata to a Zarr store. The actual data is not written, only the metadata structure is created. """ ds = _construct_mdio_dataset(mdio_ds) - + def _generate_encodings() -> dict: """Generate encodings for each variable in the MDIO dataset. - + Returns: Dictionary mapping variable names to their encoding configurations. """ - dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() + # TODO: Re-enable chunk_key_encoding when supported by xarray + # dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() global_encodings = {} for var in mdio_ds.variables: fill_value = 0 @@ -160,6 +172,6 @@ def _generate_encodings() -> dict: safe_chunks=False, compute=False, encoding=_generate_encodings(), - **kwargs + **kwargs, ) return ds diff --git a/src/mdio/schemas/v1/template_builder.py b/src/mdio/schemas/v1/template_builder.py index 85baf95e..0f8d4e45 100644 --- a/src/mdio/schemas/v1/template_builder.py +++ b/src/mdio/schemas/v1/template_builder.py @@ -1,34 +1,45 @@ """Builder pattern implementation for MDIO v1 schema models.""" -from datetime import datetime, timezone -from typing import Any, Optional, List, Dict, Union -from enum import Enum, auto +from datetime import datetime +from datetime import timezone +from enum import Enum +from enum import auto +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Union from pydantic import AwareDatetime +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension -from mdio.schema.compressors import Blosc, ZFP -from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.dataset import DatasetMetadata +from mdio.schema.v1.template_factory import make_coordinate +from mdio.schema.v1.template_factory import make_dataset +from mdio.schema.v1.template_factory import make_dataset_metadata +from mdio.schema.v1.template_factory import make_named_dimension +from mdio.schema.v1.template_factory import make_variable from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.dataset import Dataset, DatasetMetadata -from mdio.schema.v1.variable import Variable, Coordinate, VariableMetadata -from mdio.schema.v1.template_factory import ( - make_named_dimension, - make_coordinate, - make_variable, - make_dataset_metadata, - make_dataset, -) +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata class _BuilderState(Enum): """States for the template builder.""" + INITIAL = auto() HAS_DIMENSIONS = auto() HAS_COORDINATES = auto() HAS_VARIABLES = auto() + class TemplateBuilder: """Builder for creating MDIO datasets with enforced build order: 1. Must add dimensions first via add_dimension() @@ -36,6 +47,7 @@ class TemplateBuilder: 3. Must add variables via add_variable() 4. Must call build() to create the dataset """ + def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): self.name = name self.api_version = "1.0.0" # TODO: Pull from package metadata @@ -47,14 +59,16 @@ def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): self._state = _BuilderState.INITIAL self._unnamed_variable_counter = 0 - def add_dimension(self, + def add_dimension( + self, name: str, size: int, long_name: str = None, data_type: ScalarType | StructuredType = ScalarType.INT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder': + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + ) -> "TemplateBuilder": """Add a dimension. This must be called at least once before adding coordinates or variables. - + Args: name: Name of the dimension size: Size of the dimension @@ -65,38 +79,42 @@ def add_dimension(self, # Create the dimension dimension = make_named_dimension(name, size) self._dimensions.append(dimension) - + # Create a variable for the dimension dim_var = make_variable( name=name, long_name=long_name, dimensions=[dimension], data_type=data_type, - metadata=metadata + metadata=metadata, ) self._variables.append(dim_var) - + self._state = _BuilderState.HAS_DIMENSIONS return self - def add_coordinate(self, + def add_coordinate( + self, name: str = "", *, long_name: str = None, dimensions: List[NamedDimension | str] = [], data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder': + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + ) -> "TemplateBuilder": """Add a coordinate after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before adding coordinates") - + raise ValueError( + "Must add at least one dimension before adding coordinates" + ) + if name == "": name = f"coord_{len(self._coordinates)}" if dimensions == []: dimensions = self._dimensions if isinstance(metadata, dict): metadata = [metadata] - + # Convert string dimension names to NamedDimension objects dim_objects = [] for dim in dimensions: @@ -107,18 +125,21 @@ def add_coordinate(self, dim_objects.append(dim_obj) else: dim_objects.append(dim) - - self._coordinates.append(make_coordinate( - name=name, - long_name=long_name, - dimensions=dim_objects, - data_type=data_type, - metadata=metadata - )) + + self._coordinates.append( + make_coordinate( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + metadata=metadata, + ) + ) self._state = _BuilderState.HAS_COORDINATES return self - def add_variable(self, + def add_variable( + self, name: str = "", *, long_name: str = None, @@ -126,17 +147,18 @@ def add_variable(self, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None) -> 'TemplateBuilder': + metadata: Optional[VariableMetadata] = None, + ) -> "TemplateBuilder": """Add a variable after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: raise ValueError("Must add at least one dimension before adding variables") - + if name == "": name = f"var_{self._unnamed_variable_counter}" self._unnamed_variable_counter += 1 if dimensions == []: dimensions = self._dimensions - + # Convert string dimension names to NamedDimension objects dim_objects = [] for dim in dimensions: @@ -147,16 +169,18 @@ def add_variable(self, dim_objects.append(dim_obj) else: dim_objects.append(dim) - - self._variables.append(make_variable( - name=name, - long_name=long_name, - dimensions=dim_objects, - data_type=data_type, - compressor=compressor, - coordinates=coordinates, - metadata=metadata - )) + + self._variables.append( + make_variable( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=metadata, + ) + ) self._state = _BuilderState.HAS_VARIABLES return self @@ -164,14 +188,11 @@ def build(self) -> Dataset: """Build the final dataset.""" if self._state == _BuilderState.INITIAL: raise ValueError("Must add at least one dimension before building") - + metadata = make_dataset_metadata( - self.name, - self.api_version, - self.created_on, - self.attributes + self.name, self.api_version, self.created_on, self.attributes ) - + # Add coordinates as variables to the dataset # We make a copy so that coordinates are not duplicated if the builder is reused all_variables = self._variables.copy() @@ -182,8 +203,8 @@ def build(self) -> Dataset: long_name=coord.long_name, dimensions=coord.dimensions, data_type=coord.data_type, - metadata=coord.metadata + metadata=coord.metadata, ) all_variables.append(coord_var) - - return make_dataset(all_variables, metadata) \ No newline at end of file + + return make_dataset(all_variables, metadata) diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py index abcbcc17..79df3558 100644 --- a/src/mdio/schemas/v1/template_factory.py +++ b/src/mdio/schemas/v1/template_factory.py @@ -1,17 +1,24 @@ """Factory methods for MDIO v1 schema models.""" -from datetime import datetime, timezone -from typing import Any, Optional, Union, List, Dict - -from pydantic import AwareDatetime - +from datetime import datetime +from datetime import timezone +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension -from mdio.schema.compressors import Blosc, ZFP -from mdio.schema.dtype import ScalarType, StructuredType +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.dataset import DatasetMetadata from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.dataset import Dataset, DatasetMetadata -from mdio.schema.v1.variable import Variable, Coordinate, VariableMetadata +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata def make_named_dimension(name: str, size: int) -> NamedDimension: @@ -36,16 +43,34 @@ def make_coordinate( ) -def make_variable( +def make_variable( # noqa: C901 name: str, dimensions: List[NamedDimension | str], data_type: ScalarType | StructuredType, long_name: str = None, compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata] = None, + metadata: Optional[ + List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata + ] = None, ) -> Variable: - """Create a Variable with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" + """Create a Variable with the given parameters. + + Args: + name: Name of the variable + dimensions: List of dimensions + data_type: Data type of the variable + long_name: Optional long name + compressor: Optional compressor + coordinates: Optional list of coordinates + metadata: Optional metadata + + Returns: + Variable: A Variable instance with the specified parameters. + + Raises: + TypeError: If the metadata type is not supported. + """ # Convert metadata to VariableMetadata if needed var_metadata = None if metadata: @@ -100,7 +125,7 @@ def make_variable( def make_dataset_metadata( name: str, api_version: str, - created_on: AwareDatetime, + created_on: datetime, attributes: Optional[Dict[str, Any]] = None, ) -> DatasetMetadata: """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" @@ -124,58 +149,79 @@ def make_dataset( class AbstractTemplateFactory: + """Abstract factory for creating MDIO datasets.""" + + def __init__(self, name: str): + """Initialize the factory. - def __init__(self, name: str): + Args: + name: Name of the dataset + """ self.name = name self.api_version = "1.0.0" # TODO: Pull from package metadata self.created_on = datetime.now(timezone.utc) + self.dimensions: List[NamedDimension] = [] + self.coordinates: List[Coordinate] = [] + self.variables: List[Variable] = [] - - def AddDimension(self, name: str, size: int) -> 'AbstractTemplateFactory': + def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory": """Add a dimension to the factory.""" self.dimensions.append(make_named_dimension(name, size)) return self - - def AddCoordinate(self, + def add_coordinate( + self, name: str = "", - dimensions: List[NamedDimension | str] = [], + dimensions: Optional[List[NamedDimension | str]] = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] = None) -> 'AbstractTemplateFactory': + metadata: Optional[List[AllUnits | UserAttributes]] = None, + ) -> "AbstractTemplateFactory": """Add a coordinate to the factory.""" if name == "": name = f"coord_{len(self.coordinates)}" - if dimensions == []: + if dimensions is None: dimensions = self.dimensions self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) return self - def AddVariable(self, name: str = "", - dimensions: List[NamedDimension | str] = [], - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None) -> 'AbstractTemplateFactory': + def add_variable( + self, + name: str = "", + dimensions: Optional[List[NamedDimension | str]] = None, + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[VariableMetadata] = None, + ) -> "AbstractTemplateFactory": """Add a variable to the factory.""" if name == "": name = f"var_{len(self.variables)}" - if dimensions == []: + if dimensions is None: dimensions = self.dimensions - self.variables.append(make_variable(name, dimensions, data_type, compressor, coordinates, metadata)) + self.variables.append( + make_variable( + name, dimensions, data_type, compressor, coordinates, metadata + ) + ) return self def _compose_metadata(self) -> DatasetMetadata: """Compose the DatasetMetadata with the given name, api_version, and created_on.""" return make_dataset_metadata(self.name, self.api_version, self.created_on) - def _compose_variables(self) -> List[Variable]: - """Compose the Variables with the given name, dimensions, data_type, compressor, coordinates, and metadata.""" + """Compose the Variables with the given parameters.""" return [ - make_variable(self.name, self.dimensions, self.data_type, self.compressor, self.coordinates, self.metadata) + make_variable( + self.name, + self.dimensions, + self.data_type, + self.compressor, + self.coordinates, + self.metadata, + ) ] - def make_dataset(self, variables: List[Variable]) -> Dataset: """Create a Dataset with the given variables and metadata.""" return Dataset(variables=variables, metadata=self._compose_metadata()) diff --git a/tests/integration/test_v1_constructor.py b/tests/integration/test_v1_constructor.py index 2b9af6a6..81b49479 100644 --- a/tests/integration/test_v1_constructor.py +++ b/tests/integration/test_v1_constructor.py @@ -1,22 +1,22 @@ """Integration test for MDIO v1 Xarray Zarr constructor.""" -import pytest -import zarr + +from datetime import datetime + import numpy as np -from pathlib import Path -from datetime import datetime, timezone -from mdio.schema.dtype import ScalarType, StructuredType -from mdio.schema.compressors import Blosc, ZFP -from mdio.schema.v1.template_factory import ( - make_named_dimension, - make_variable, - make_dataset_metadata, - make_dataset, -) -from mdio.core.v1.constructor import Write_MDIO_metadata +from mdio.core.v1.constructor import write_mdio_metadata +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.v1.template_factory import make_dataset +from mdio.schema.v1.template_factory import make_dataset_metadata +from mdio.schema.v1.template_factory import make_named_dimension +from mdio.schema.v1.template_factory import make_variable def build_toy_dataset(): + """Build a toy dataset for testing.""" # core dimensions inline = make_named_dimension("inline", 256) crossline = make_named_dimension("crossline", 512) @@ -32,10 +32,10 @@ def build_toy_dataset(): "textHeader": [ "C01 .......................... ", "C02 .......................... ", - "C03 .......................... " + "C03 .......................... ", ], - "foo": "bar" - } + "foo": "bar", + }, ) # Image variable @@ -46,17 +46,20 @@ def build_toy_dataset(): compressor=Blosc(algorithm="zstd"), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, "statsV1": { "count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, }, - "attributes": {"fizz": "buzz"} - } + "attributes": {"fizz": "buzz"}, + }, ) # Velocity variable @@ -67,9 +70,12 @@ def build_toy_dataset(): compressor=None, coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, - "unitsV1": {"speed": "m/s"} - } + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, + "unitsV1": {"speed": "m/s"}, + }, ) # Inline-optimized image variable @@ -79,63 +85,92 @@ def build_toy_dataset(): data_type=ScalarType.FLOAT32, compressor=ZFP(mode="fixed_accuracy", tolerance=0.05), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], - metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}}} + metadata={ + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [4, 512, 512]}, + } + }, ) # Headers variable with structured dtype - headers_dtype = StructuredType(fields=[ - {"name": "cdp-x", "format": ScalarType.INT32}, - {"name": "cdp-y", "format": ScalarType.INT32}, - {"name": "elevation", "format": ScalarType.FLOAT16}, - {"name": "some_scalar", "format": ScalarType.FLOAT16} - ]) + headers_dtype = StructuredType( + fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16}, + ] + ) image_headers = make_variable( name="image_headers", dimensions=[inline, crossline], data_type=headers_dtype, compressor=None, coordinates=["inline", "crossline", "cdp-x", "cdp-y"], - metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} + metadata={ + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128]}, + } + }, ) # Standalone dimension variables - inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32, compressor=None) - crossline_var = make_variable(name="crossline", dimensions=[crossline], data_type=ScalarType.UINT32, compressor=None) + inline_var = make_variable( + name="inline", dimensions=[inline], data_type=ScalarType.UINT32, compressor=None + ) + crossline_var = make_variable( + name="crossline", + dimensions=[crossline], + data_type=ScalarType.UINT32, + compressor=None, + ) depth_var = make_variable( name="depth", dimensions=[depth], data_type=ScalarType.UINT32, compressor=None, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) cdp_x = make_variable( name="cdp-x", dimensions=[inline, crossline], data_type=ScalarType.FLOAT32, compressor=None, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) cdp_y = make_variable( name="cdp-y", dimensions=[inline, crossline], data_type=ScalarType.FLOAT32, compressor=None, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) # Compose full dataset return make_dataset( - [image, velocity, image_inline, image_headers, - inline_var, crossline_var, depth_var, cdp_x, cdp_y], - meta + [ + image, + velocity, + image_inline, + image_headers, + inline_var, + crossline_var, + depth_var, + cdp_x, + cdp_y, + ], + meta, ) -def test_to_mdio_zarr_writes_and_returns_xarray(tmp_path): +def test_to_mdio_writes_and_returns_mdio(tmp_path): + """Test that to_mdio writes and returns an mdio.Dataset.""" ds_in = build_toy_dataset() store_path = tmp_path / "toy.mdio" # write to Zarr and get back xarray.Dataset - ds_out = Write_MDIO_metadata(ds_in, str(store_path)) + ds_out = write_mdio_metadata(ds_in, str(store_path)) # global attributes should be present on the returned Dataset assert ds_out.attrs["apiVersion"] == ds_in.metadata.api_version assert ds_out.attrs["createdOn"] == str(ds_in.metadata.created_on) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 701c40aa..4a145922 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -1,19 +1,16 @@ """Unit tests for the MDIO v1 template builder.""" -import pytest from datetime import datetime -from mdio.schema.dimension import NamedDimension +import pytest + +from mdio.core.v1.constructor import write_mdio_metadata +from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType -from mdio.schema.v1.template_builder import ( - TemplateBuilder, - _BuilderState -) -from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.variable import Variable, Coordinate -from mdio.schema.compressors import Blosc, ZFP from mdio.schema.dtype import StructuredType -from mdio.core.v1.constructor import Write_MDIO_metadata +from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.template_builder import TemplateBuilder +from mdio.schema.v1.template_builder import _BuilderState def test_builder_initialization(): @@ -31,7 +28,7 @@ def test_builder_initialization(): def test_dimension_builder_state(): """Test dimension builder state transitions and functionality.""" builder = TemplateBuilder("test_dataset") - + # First dimension should change state to HAS_DIMENSIONS and create a variable builder = builder.add_dimension("x", 100, long_name="X Dimension") assert builder._state == _BuilderState.HAS_DIMENSIONS @@ -59,15 +56,15 @@ def test_dimension_builder_state(): def test_dimension_with_metadata(): """Test adding dimensions with custom metadata.""" builder = TemplateBuilder("test_dataset") - + # Add dimension with custom metadata builder = builder.add_dimension( "depth", size=100, data_type=ScalarType.FLOAT32, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) - + assert len(builder._variables) == 1 depth_var = builder._variables[0] assert depth_var.name == "depth" @@ -78,17 +75,21 @@ def test_dimension_with_metadata(): def test_coordinate_builder_state(): """Test coordinate builder state transitions and functionality.""" builder = TemplateBuilder("test_dataset") - + # Should not be able to add coordinates before dimensions - with pytest.raises(ValueError, match="Must add at least one dimension before adding coordinates"): + with pytest.raises( + ValueError, match="Must add at least one dimension before adding coordinates" + ): builder.add_coordinate("x_coord", dimensions=["x"]) - + # Add dimensions first builder = builder.add_dimension("x", 100) builder = builder.add_dimension("y", 200) - + # Adding coordinate should change state to HAS_COORDINATES - builder = builder.add_coordinate("x_coord", dimensions=["x"], long_name="X Coordinate") + builder = builder.add_coordinate( + "x_coord", dimensions=["x"], long_name="X Coordinate" + ) assert builder._state == _BuilderState.HAS_COORDINATES assert len(builder._coordinates) == 1 assert builder._coordinates[0].name == "x_coord" @@ -106,14 +107,16 @@ def test_coordinate_builder_state(): def test_variable_builder_state(): """Test variable builder state transitions and functionality.""" builder = TemplateBuilder("test_dataset") - + # Should not be able to add variables before dimensions - with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): + with pytest.raises( + ValueError, match="Must add at least one dimension before adding variables" + ): builder.add_variable("data", dimensions=["x"]) - + # Add dimension first builder = builder.add_dimension("x", 100) - + # Adding variable should change state to HAS_VARIABLES builder = builder.add_variable("data", dimensions=["x"], long_name="Data Variable") assert builder._state == _BuilderState.HAS_VARIABLES @@ -132,14 +135,16 @@ def test_variable_builder_state(): def test_build_dataset(): """Test building a complete dataset.""" - dataset = (TemplateBuilder("test_dataset") + dataset = ( + TemplateBuilder("test_dataset") .add_dimension("x", 100) .add_dimension("y", 200) .add_coordinate("x_coord", dimensions=["x"]) .add_coordinate("y_coord", dimensions=["y"]) .add_variable("data", dimensions=["x", "y"], long_name="Test Data") - .build()) - + .build() + ) + assert isinstance(dataset, Dataset) assert dataset.metadata.name == "test_dataset" # Two dimension variables + one data variable + two coordinate variables @@ -153,14 +158,16 @@ def test_build_dataset(): def test_auto_naming(): """Test automatic naming of coordinates and variables.""" - dataset = (TemplateBuilder("test_dataset") + dataset = ( + TemplateBuilder("test_dataset") .add_dimension("x", 100) .add_coordinate() # Should be named "coord_0" .add_coordinate() # Should be named "coord_1" - .add_variable() # Should be named "var_0" - .add_variable() # Should be named "var_1" - .build()) - + .add_variable() # Should be named "var_0" + .add_variable() # Should be named "var_1" + .build() + ) + assert dataset.variables[0].name == "x" # Dimension variable assert dataset.variables[1].name == "var_0" assert dataset.variables[2].name == "var_1" @@ -168,12 +175,14 @@ def test_auto_naming(): def test_default_dimensions(): """Test that coordinates and variables use all dimensions by default.""" - dataset = (TemplateBuilder("test_dataset") + dataset = ( + TemplateBuilder("test_dataset") .add_dimension("x", 100) .add_dimension("y", 200) .add_coordinate() # Should use both x and y dimensions - .add_variable() # Should use both x and y dimensions - .build()) + .add_variable() # Should use both x and y dimensions + .build() + ) # Two dimension variables + one data variable + one coordinate variable assert len(dataset.variables) == 4 @@ -186,39 +195,60 @@ def test_default_dimensions(): def test_build_order_enforcement(): """Test that the builder enforces the correct build order.""" builder = TemplateBuilder("test_dataset") - + # Should not be able to add coordinates before dimensions - with pytest.raises(ValueError, match="Must add at least one dimension before adding coordinates"): + with pytest.raises( + ValueError, match="Must add at least one dimension before adding coordinates" + ): builder.add_coordinate("x_coord", dimensions=["x"]) - + # Should not be able to add variables before dimensions - with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): + with pytest.raises( + ValueError, match="Must add at least one dimension before adding variables" + ): builder.add_variable("data", dimensions=["x"]) - + # Should not be able to build without dimensions - with pytest.raises(ValueError, match="Must add at least one dimension before building"): + with pytest.raises( + ValueError, match="Must add at least one dimension before building" + ): builder.build() def test_toy_example(tmp_path): """Test building a toy dataset with multiple variables and attributes.""" - dataset = (TemplateBuilder("campos_3d", attributes={ - "textHeader": [ - "C01 .......................... ", - "C02 .......................... ", - "C03 .......................... " - ], - "foo": "bar" - }) + dataset = ( + TemplateBuilder( + "campos_3d", + attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... ", + ], + "foo": "bar", + }, + ) # Add dimensions .add_dimension("inline", 256, data_type=ScalarType.UINT32) .add_dimension("crossline", 512, data_type=ScalarType.UINT32) - .add_dimension("depth", 384, data_type=ScalarType.UINT32, metadata={"unitsV1": {"length": "m"}}) - + .add_dimension( + "depth", + 384, + data_type=ScalarType.UINT32, + metadata={"unitsV1": {"length": "m"}}, + ) # Add coordinates - .add_coordinate("cdp-x", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) - .add_coordinate("cdp-y", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) - + .add_coordinate( + "cdp-x", + dimensions=["inline", "crossline"], + metadata={"unitsV1": {"length": "m"}}, + ) + .add_coordinate( + "cdp-y", + dimensions=["inline", "crossline"], + metadata={"unitsV1": {"length": "m"}}, + ) # Add image variable .add_variable( name="image", @@ -227,19 +257,21 @@ def test_toy_example(tmp_path): compressor=Blosc(algorithm="zstd"), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, "statsV1": { "count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, }, - "attributes": {"fizz": "buzz"} - } + "attributes": {"fizz": "buzz"}, + }, ) - # Add velocity variable .add_variable( name="velocity", @@ -247,11 +279,13 @@ def test_toy_example(tmp_path): data_type=ScalarType.FLOAT16, coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, - "unitsV1": {"speed": "m/s"} - } + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, + "unitsV1": {"speed": "m/s"}, + }, ) - # Add inline-optimized image variable .add_variable( name="image_inline", @@ -261,69 +295,73 @@ def test_toy_example(tmp_path): compressor=Blosc(algorithm="zstd"), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}} - } + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [4, 512, 512]}, + } + }, ) - # Add headers variable with structured dtype .add_variable( name="image_headers", dimensions=["inline", "crossline"], - data_type=StructuredType(fields=[ - {"name": "cdp-x", "format": ScalarType.INT32}, - {"name": "cdp-y", "format": ScalarType.INT32}, - {"name": "elevation", "format": ScalarType.FLOAT16}, - {"name": "some_scalar", "format": ScalarType.FLOAT16} - ]), + data_type=StructuredType( + fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16}, + ] + ), coordinates=["inline", "crossline", "cdp-x", "cdp-y"], - # metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} ) - .build()) + .build() + ) # print(dataset.model_dump_json(indent=2)) path = tmp_path / "toy.mdio" - Write_MDIO_metadata(dataset, path) + write_mdio_metadata(dataset, path) # Verify dataset structure assert dataset.metadata.name == "campos_3d" assert dataset.metadata.api_version == "1.0.0" assert dataset.metadata.attributes["foo"] == "bar" assert len(dataset.metadata.attributes["textHeader"]) == 3 - + # Verify variables (including dimension variables) # 3 dimension variables + 4 data variables + 2 coordinate variables assert len(dataset.variables) == 9 - + # Verify dimension variables inline_var = next(v for v in dataset.variables if v.name == "inline") assert inline_var.data_type == ScalarType.UINT32 assert len(inline_var.dimensions) == 1 assert inline_var.dimensions[0].name == "inline" - + depth_var = next(v for v in dataset.variables if v.name == "depth") assert depth_var.data_type == ScalarType.UINT32 assert depth_var.metadata.units_v1.length == "m" - + # Verify image variable image = next(v for v in dataset.variables if v.name == "image") assert image.data_type == ScalarType.FLOAT32 assert isinstance(image.compressor, Blosc) assert image.compressor.algorithm == "zstd" assert image.metadata.stats_v1.count == 100 - + # Verify velocity variable velocity = next(v for v in dataset.variables if v.name == "velocity") assert velocity.data_type == ScalarType.FLOAT16 assert velocity.compressor is None assert velocity.metadata.units_v1.speed == "m/s" - + # Verify image_inline variable image_inline = next(v for v in dataset.variables if v.name == "image_inline") assert image_inline.long_name == "inline optimized version of 3d_stack" assert isinstance(image_inline.compressor, Blosc) assert image_inline.compressor.algorithm == "zstd" - + # Verify image_headers variable headers = next(v for v in dataset.variables if v.name == "image_headers") assert isinstance(headers.data_type, StructuredType) diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index e6662d70..2dcbc3ec 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -1,22 +1,25 @@ """Unit tests for MDIO v1 template_factory.""" + +from datetime import datetime +from datetime import timezone + import pytest -from datetime import datetime, timezone -from pydantic import AwareDatetime, ValidationError +from pydantic import ValidationError +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension -from mdio.schema.compressors import Blosc, ZFP -from mdio.schema.dtype import ScalarType, StructuredType -from mdio.schema.v1.units import LengthUnitModel -from mdio.schema.v1.template_factory import ( - make_named_dimension, - make_coordinate, - make_variable, - make_dataset_metadata, - make_dataset, -) +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.v1.template_factory import make_coordinate +from mdio.schema.v1.template_factory import make_dataset +from mdio.schema.v1.template_factory import make_dataset_metadata +from mdio.schema.v1.template_factory import make_named_dimension +from mdio.schema.v1.template_factory import make_variable def test_make_named_dimension(): + """Test that make_named_dimension returns a NamedDimension object.""" dim = make_named_dimension("time", 42) assert isinstance(dim, NamedDimension) assert dim.name == "time" @@ -24,6 +27,7 @@ def test_make_named_dimension(): def test_make_coordinate_minimal(): + """Test that make_coordinate returns a Coordinate object.""" dims = ["x"] coord = make_coordinate(name="x", dimensions=dims, data_type=ScalarType.FLOAT32) assert coord.name == "x" @@ -33,6 +37,7 @@ def test_make_coordinate_minimal(): def test_make_variable_minimal(): + """Test that make_variable returns a Variable object.""" var = make_variable( name="var", dimensions=["x"], @@ -48,7 +53,8 @@ def test_make_variable_minimal(): def test_make_dataset_metadata_minimal(): - ts: AwareDatetime = datetime.now(timezone.utc) + """Test that make_dataset_metadata returns a DatasetMetadata object.""" + ts = datetime.now(timezone.utc) meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) assert meta.name == "ds" assert meta.api_version == "1" @@ -57,19 +63,22 @@ def test_make_dataset_metadata_minimal(): def test_make_dataset_minimal(): + """Test that make_dataset returns a Dataset object.""" var = make_variable( name="var", dimensions=["x"], data_type=ScalarType.FLOAT32, compressor=None, ) - ts: AwareDatetime = datetime.now(timezone.utc) + ts = datetime.now(timezone.utc) meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) ds = make_dataset([var], meta) assert ds.variables == [var] - assert ds.metadata == meta + assert ds.metadata == meta + def test_make_toy_dataset(): + """Test that make_toy_dataset returns a Dataset object.""" # Define core dimensions inline = make_named_dimension("inline", 256) crossline = make_named_dimension("crossline", 512) @@ -85,10 +94,10 @@ def test_make_toy_dataset(): "textHeader": [ "C01 .......................... ", "C02 .......................... ", - "C03 .......................... " + "C03 .......................... ", ], - "foo": "bar" - } + "foo": "bar", + }, ) # Image variable @@ -99,17 +108,20 @@ def test_make_toy_dataset(): compressor=Blosc(algorithm="zstd"), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, "statsV1": { "count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]} + "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, }, - "attributes": {"fizz": "buzz"} - } + "attributes": {"fizz": "buzz"}, + }, ) # Velocity variable @@ -120,9 +132,12 @@ def test_make_toy_dataset(): compressor=None, coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128, 128]}}, - "unitsV1": {"speed": "m/s"} - } + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128, 128]}, + }, + "unitsV1": {"speed": "m/s"}, + }, ) # Inline-optimized image variable @@ -133,92 +148,125 @@ def test_make_toy_dataset(): compressor=ZFP(mode="fixed_accuracy", tolerance=0.05), coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], metadata={ - "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [4, 512, 512]}} - } + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [4, 512, 512]}, + } + }, ) # Headers variable with structured dtype - headers_dtype = StructuredType(fields=[ - {"name": "cdp-x", "format": ScalarType.INT32}, - {"name": "cdp-y", "format": ScalarType.INT32}, - {"name": "elevation", "format": ScalarType.FLOAT16}, - {"name": "some_scalar", "format": ScalarType.FLOAT16} - ]) + headers_dtype = StructuredType( + fields=[ + {"name": "cdp-x", "format": ScalarType.INT32}, + {"name": "cdp-y", "format": ScalarType.INT32}, + {"name": "elevation", "format": ScalarType.FLOAT16}, + {"name": "some_scalar", "format": ScalarType.FLOAT16}, + ] + ) image_headers = make_variable( name="image_headers", dimensions=[inline, crossline], data_type=headers_dtype, compressor=None, coordinates=["inline", "crossline", "cdp-x", "cdp-y"], - metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [128, 128]}}} + metadata={ + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [128, 128]}, + } + }, ) # Standalone dimension arrays # Tests that we don't need to pass a compressor. - inline_var = make_variable(name="inline", dimensions=[inline], data_type=ScalarType.UINT32) + inline_var = make_variable( + name="inline", dimensions=[inline], data_type=ScalarType.UINT32 + ) # Tests that we can still pass it explicitly. - crossline_var = make_variable(name="crossline", dimensions=[crossline], data_type=ScalarType.UINT32, compressor=None) + crossline_var = make_variable( + name="crossline", + dimensions=[crossline], + data_type=ScalarType.UINT32, + compressor=None, + ) depth_var = make_variable( name="depth", dimensions=[depth], data_type=ScalarType.UINT32, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) cdp_x = make_variable( name="cdp-x", dimensions=[inline, crossline], data_type=ScalarType.FLOAT32, - compressor=None, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) cdp_y = make_variable( name="cdp-y", dimensions=[inline, crossline], data_type=ScalarType.FLOAT32, - compressor=None, - metadata={"unitsV1": {"length": "m"}} + metadata={"unitsV1": {"length": "m"}}, ) # Compose full dataset ds = make_dataset( - variables=[ - image, velocity, image_inline, image_headers, - inline_var, crossline_var, depth_var, cdp_x, cdp_y + [ + image, + velocity, + image_inline, + image_headers, + inline_var, + crossline_var, + depth_var, + cdp_x, + cdp_y, ], - metadata=meta + meta, ) - # Verify basic structure - assert ds.metadata.name == "campos_3d" + assert ds.metadata == meta assert len(ds.variables) == 9 - names = [v.name for v in ds.variables] - assert names == [ - "image", "velocity", "image_inline", "image_headers", - "inline", "crossline", "depth", "cdp-x", "cdp-y" - ] + assert ds.variables[0] == image + assert ds.variables[1] == velocity + assert ds.variables[2] == image_inline + assert ds.variables[3] == image_headers + assert ds.variables[4] == inline_var + assert ds.variables[5] == crossline_var + assert ds.variables[6] == depth_var + assert ds.variables[7] == cdp_x + assert ds.variables[8] == cdp_y + def test_named_dimension_invalid_size(): + """Test that make_named_dimension raises a ValidationError for invalid size.""" with pytest.raises(ValidationError): make_named_dimension("dim", 0) with pytest.raises(ValidationError): make_named_dimension("dim", -1) + def test_make_coordinate_invalid_types(): + """Test that make_coordinate raises a ValidationError for invalid types.""" # dimensions must be a list of NamedDimension or str with pytest.raises(ValidationError): - make_coordinate(name="coord", dimensions="notalist", data_type=ScalarType.FLOAT32) + make_coordinate( + name="coord", dimensions="notalist", data_type=ScalarType.FLOAT32 + ) # data_type must be a valid ScalarType with pytest.raises(ValidationError): make_coordinate(name="coord", dimensions=["x"], data_type="notatype") + def test_make_variable_invalid_args(): + """Test that make_variable raises a ValidationError for invalid types.""" # compressor must be Blosc, ZFP or None with pytest.raises(ValidationError): make_variable( name="var", dimensions=["x"], data_type=ScalarType.FLOAT32, - compressor="notacompressor" + compressor="notacompressor", ) # metadata dict must match VariableMetadata schema with pytest.raises(ValidationError): @@ -227,17 +275,20 @@ def test_make_variable_invalid_args(): dimensions=["x"], data_type=ScalarType.FLOAT32, compressor=None, - metadata={"foo": "bar"} + metadata={"foo": "bar"}, ) + def test_make_dataset_metadata_invalid_created_on(): + """Test that make_dataset_metadata raises a ValidationError for invalid created_on.""" # created_on must be an aware datetime with pytest.raises(ValidationError): make_dataset_metadata(name="ds", api_version="1", created_on="not-a-date") def test_make_dataset_invalid_variables_and_metadata_types(): - ts: AwareDatetime = datetime.now(timezone.utc) + """Test that make_dataset raises a ValidationError.""" + ts = datetime.now(timezone.utc) meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) var = make_variable( name="var", @@ -250,4 +301,4 @@ def test_make_dataset_invalid_variables_and_metadata_types(): make_dataset(variables="notalist", metadata=meta) # metadata must be a DatasetMetadata instance with pytest.raises(ValidationError): - make_dataset(variables=[var], metadata={"foo": "bar"}) \ No newline at end of file + make_dataset(variables=[var], metadata={"foo": "bar"}) From 0fdd4a87f80c4628c7df192a780cf8d5ff0adffd Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 15:03:37 +0000 Subject: [PATCH 16/55] Linting --- src/mdio/schemas/v1/template_builder.py | 40 +++++++++++++++++-------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/mdio/schemas/v1/template_builder.py b/src/mdio/schemas/v1/template_builder.py index 0f8d4e45..6db6f825 100644 --- a/src/mdio/schemas/v1/template_builder.py +++ b/src/mdio/schemas/v1/template_builder.py @@ -8,9 +8,6 @@ from typing import Dict from typing import List from typing import Optional -from typing import Union - -from pydantic import AwareDatetime from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc @@ -19,7 +16,8 @@ from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.dataset import DatasetMetadata + +# from mdio.schema.v1.dataset import DatasetMetadata from mdio.schema.v1.template_factory import make_coordinate from mdio.schema.v1.template_factory import make_dataset from mdio.schema.v1.template_factory import make_dataset_metadata @@ -31,6 +29,9 @@ from mdio.schema.v1.variable import VariableMetadata +# from pydantic import AwareDatetime + + class _BuilderState(Enum): """States for the template builder.""" @@ -41,14 +42,22 @@ class _BuilderState(Enum): class TemplateBuilder: - """Builder for creating MDIO datasets with enforced build order: + """Builder for creating MDIO datasets with enforced build order. + + The build order is: 1. Must add dimensions first via add_dimension() 2. Can optionally add coordinates via add_coordinate() 3. Must add variables via add_variable() - 4. Must call build() to create the dataset + 4. Must call build() to create the dataset. """ def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): + """Initialize the builder. + + Args: + name: Name of the dataset + attributes: Optional attributes for the dataset + """ self.name = name self.api_version = "1.0.0" # TODO: Pull from package metadata self.created_on = datetime.now(timezone.utc) @@ -67,7 +76,9 @@ def add_dimension( data_type: ScalarType | StructuredType = ScalarType.INT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, ) -> "TemplateBuilder": - """Add a dimension. This must be called at least once before adding coordinates or variables. + """Add a dimension. + + This must be called at least once before adding coordinates or variables. Args: name: Name of the dimension @@ -75,6 +86,9 @@ def add_dimension( long_name: Optional long name for the dimension variable data_type: Data type for the dimension variable (defaults to INT32) metadata: Optional metadata for the dimension variable + + Returns: + self: Returns self for method chaining """ # Create the dimension dimension = make_named_dimension(name, size) @@ -98,7 +112,7 @@ def add_coordinate( name: str = "", *, long_name: str = None, - dimensions: List[NamedDimension | str] = [], + dimensions: Optional[List[NamedDimension | str]] = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, ) -> "TemplateBuilder": @@ -110,7 +124,7 @@ def add_coordinate( if name == "": name = f"coord_{len(self._coordinates)}" - if dimensions == []: + if dimensions is None: dimensions = self._dimensions if isinstance(metadata, dict): metadata = [metadata] @@ -121,7 +135,7 @@ def add_coordinate( if isinstance(dim, str): dim_obj = next((d for d in self._dimensions if d.name == dim), None) if dim_obj is None: - raise ValueError(f"Dimension '{dim}' not found") + raise ValueError(f"Dimension {dim!r} not found") dim_objects.append(dim_obj) else: dim_objects.append(dim) @@ -143,7 +157,7 @@ def add_variable( name: str = "", *, long_name: str = None, - dimensions: List[NamedDimension | str] = [], + dimensions: Optional[List[NamedDimension | str]] = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, @@ -156,7 +170,7 @@ def add_variable( if name == "": name = f"var_{self._unnamed_variable_counter}" self._unnamed_variable_counter += 1 - if dimensions == []: + if dimensions is None: dimensions = self._dimensions # Convert string dimension names to NamedDimension objects @@ -165,7 +179,7 @@ def add_variable( if isinstance(dim, str): dim_obj = next((d for d in self._dimensions if d.name == dim), None) if dim_obj is None: - raise ValueError(f"Dimension '{dim}' not found") + raise ValueError(f"Dimension {dim!r} not found") dim_objects.append(dim_obj) else: dim_objects.append(dim) From de8d54177d9eabf2ca01dca5ea3e210efc2b379b Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 16:31:03 +0000 Subject: [PATCH 17/55] Pull construction logic out of schemas directory --- src/mdio/core/v1/__init__.py | 16 ++ src/mdio/core/v1/builder.py | 224 +++++++++++++++++ src/mdio/core/v1/factory.py | 227 ++++++++++++++++++ src/mdio/schemas/v1/template_builder.py | 20 +- src/mdio/schemas/v1/template_factory.py | 2 +- tests/integration/test_v1_constructor.py | 13 +- tests/unit/schema/v1/test_template_builder.py | 9 +- tests/unit/test_template_factory.py | 14 +- 8 files changed, 499 insertions(+), 26 deletions(-) create mode 100644 src/mdio/core/v1/builder.py create mode 100644 src/mdio/core/v1/factory.py diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index e4be7f39..73ce3ab1 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -4,10 +4,26 @@ """ from ._overloads import mdio +from .builder import Builder from .constructor import write_mdio_metadata +from .factory import ( + AbstractTemplateFactory, + make_coordinate, + make_dataset, + make_dataset_metadata, + make_named_dimension, + make_variable, +) __all__ = [ + "Builder", + "AbstractTemplateFactory", + "make_coordinate", + "make_dataset", + "make_dataset_metadata", + "make_named_dimension", + "make_variable", "mdio", "write_mdio_metadata", ] diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py new file mode 100644 index 00000000..e5fad4a1 --- /dev/null +++ b/src/mdio/core/v1/builder.py @@ -0,0 +1,224 @@ +"""Builder pattern implementation for MDIO v1 schema models.""" + +from datetime import datetime +from datetime import timezone +from enum import Enum +from enum import auto +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.dataset import Dataset + +# from mdio.schema.v1.dataset import DatasetMetadata +from .factory import make_coordinate +from .factory import make_dataset +from .factory import make_dataset_metadata +from .factory import make_named_dimension +from .factory import make_variable +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata + + +# from pydantic import AwareDatetime + + +class _BuilderState(Enum): + """States for the template builder.""" + + INITIAL = auto() + HAS_DIMENSIONS = auto() + HAS_COORDINATES = auto() + HAS_VARIABLES = auto() + + +class Builder: + """Builder for creating MDIO datasets with enforced build order. + + The build order is: + 1. Must add dimensions first via add_dimension() + 2. Can optionally add coordinates via add_coordinate() + 3. Must add variables via add_variable() + 4. Must call build() to create the dataset. + """ + + def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): + """Initialize the builder. + + Args: + name: Name of the dataset + attributes: Optional attributes for the dataset + """ + self.name = name + self.api_version = "1.0.0" # TODO: Pull from package metadata + self.created_on = datetime.now(timezone.utc) + self.attributes = attributes + self._dimensions: List[NamedDimension] = [] + self._coordinates: List[Coordinate] = [] + self._variables: List[Variable] = [] + self._state = _BuilderState.INITIAL + self._unnamed_variable_counter = 0 + + def add_dimension( + self, + name: str, + size: int, + long_name: str = None, + data_type: ScalarType | StructuredType = ScalarType.INT32, + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + ) -> "Builder": + """Add a dimension. + + This must be called at least once before adding coordinates or variables. + + Args: + name: Name of the dimension + size: Size of the dimension + long_name: Optional long name for the dimension variable + data_type: Data type for the dimension variable (defaults to INT32) + metadata: Optional metadata for the dimension variable + + Returns: + self: Returns self for method chaining + """ + # Create the dimension + dimension = make_named_dimension(name, size) + self._dimensions.append(dimension) + + # Create a variable for the dimension + dim_var = make_variable( + name=name, + long_name=long_name, + dimensions=[dimension], + data_type=data_type, + metadata=metadata, + ) + self._variables.append(dim_var) + + self._state = _BuilderState.HAS_DIMENSIONS + return self + + def add_coordinate( + self, + name: str = "", + *, + long_name: str = None, + dimensions: Optional[List[NamedDimension | str]] = None, + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + ) -> "Builder": + """Add a coordinate after adding at least one dimension.""" + if self._state == _BuilderState.INITIAL: + raise ValueError( + "Must add at least one dimension before adding coordinates" + ) + + if name == "": + name = f"coord_{len(self._coordinates)}" + if dimensions is None: + dimensions = self._dimensions + if isinstance(metadata, dict): + metadata = [metadata] + + # Convert string dimension names to NamedDimension objects + dim_objects = [] + for dim in dimensions: + if isinstance(dim, str): + dim_obj = next((d for d in self._dimensions if d.name == dim), None) + if dim_obj is None: + raise ValueError(f"Dimension {dim!r} not found") + dim_objects.append(dim_obj) + else: + dim_objects.append(dim) + + self._coordinates.append( + make_coordinate( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + metadata=metadata, + ) + ) + self._state = _BuilderState.HAS_COORDINATES + return self + + def add_variable( + self, + name: str = "", + *, + long_name: str = None, + dimensions: Optional[List[NamedDimension | str]] = None, + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[VariableMetadata] = None, + ) -> "Builder": + """Add a variable after adding at least one dimension.""" + if self._state == _BuilderState.INITIAL: + raise ValueError("Must add at least one dimension before adding variables") + + if name == "": + name = f"var_{self._unnamed_variable_counter}" + self._unnamed_variable_counter += 1 + if dimensions is None: + dimensions = self._dimensions + + # Convert string dimension names to NamedDimension objects + dim_objects = [] + for dim in dimensions: + if isinstance(dim, str): + dim_obj = next((d for d in self._dimensions if d.name == dim), None) + if dim_obj is None: + raise ValueError(f"Dimension {dim!r} not found") + dim_objects.append(dim_obj) + else: + dim_objects.append(dim) + + self._variables.append( + make_variable( + name=name, + long_name=long_name, + dimensions=dim_objects, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=metadata, + ) + ) + self._state = _BuilderState.HAS_VARIABLES + return self + + def build(self) -> Dataset: + """Build the final dataset.""" + if self._state == _BuilderState.INITIAL: + raise ValueError("Must add at least one dimension before building") + + metadata = make_dataset_metadata( + self.name, self.api_version, self.created_on, self.attributes + ) + + # Add coordinates as variables to the dataset + # We make a copy so that coordinates are not duplicated if the builder is reused + all_variables = self._variables.copy() + for coord in self._coordinates: + # Convert coordinate to variable + coord_var = make_variable( + name=coord.name, + long_name=coord.long_name, + dimensions=coord.dimensions, + data_type=coord.data_type, + metadata=coord.metadata, + ) + all_variables.append(coord_var) + + return make_dataset(all_variables, metadata) \ No newline at end of file diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py new file mode 100644 index 00000000..7e0f26f8 --- /dev/null +++ b/src/mdio/core/v1/factory.py @@ -0,0 +1,227 @@ +"""Factory methods for MDIO v1 schema models.""" + +from datetime import datetime +from datetime import timezone +from typing import Any +from typing import Dict +from typing import List +from typing import Optional + +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.dataset import DatasetMetadata +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata + + +def make_named_dimension(name: str, size: int) -> NamedDimension: + """Create a NamedDimension with the given name and size.""" + return NamedDimension(name=name, size=size) + + +def make_coordinate( + name: str, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + long_name: str = None, + metadata: Optional[List[AllUnits | UserAttributes]] = None, +) -> Coordinate: + """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" + return Coordinate( + name=name, + long_name=long_name, + dimensions=dimensions, + data_type=data_type, + metadata=metadata, + ) + + +def make_variable( # noqa: C901 + name: str, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + long_name: str = None, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[ + List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata + ] = None, +) -> Variable: + """Create a Variable with the given parameters. + + Args: + name: Name of the variable + dimensions: List of dimensions + data_type: Data type of the variable + long_name: Optional long name + compressor: Optional compressor + coordinates: Optional list of coordinates + metadata: Optional metadata + + Returns: + Variable: A Variable instance with the specified parameters. + + Raises: + TypeError: If the metadata type is not supported. + """ + # Convert metadata to VariableMetadata if needed + var_metadata = None + if metadata: + if isinstance(metadata, list): + # Convert list of metadata to dict + metadata_dict = {} + for md in metadata: + if isinstance(md, AllUnits): + # For units_v1, if it's a single element, use it directly + if isinstance(md.units_v1, list) and len(md.units_v1) == 1: + metadata_dict["units_v1"] = md.units_v1[0] + else: + metadata_dict["units_v1"] = md.units_v1 + elif isinstance(md, UserAttributes): + # For attributes, if it's a single element, use it directly + attrs = md.model_dump(by_alias=True) + if isinstance(attrs, list) and len(attrs) == 1: + metadata_dict["attributes"] = attrs[0] + else: + metadata_dict["attributes"] = attrs + var_metadata = VariableMetadata(**metadata_dict) + elif isinstance(metadata, dict): + # Convert camelCase keys to snake_case for VariableMetadata + converted_dict = {} + for key, value in metadata.items(): + if key == "unitsV1": + # For units_v1, if it's a single element array, use the element directly + if isinstance(value, list) and len(value) == 1: + converted_dict["units_v1"] = value[0] + else: + converted_dict["units_v1"] = value + else: + converted_dict[key] = value + var_metadata = VariableMetadata(**converted_dict) + elif isinstance(metadata, VariableMetadata): + var_metadata = metadata + else: + raise TypeError(f"Unsupported metadata type: {type(metadata)}") + + # Create the variable with all attributes explicitly set + return Variable( + name=name, + long_name=long_name, + dimensions=dimensions, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=var_metadata, + ) + + +def make_dataset_metadata( + name: str, + api_version: str, + created_on: datetime, + attributes: Optional[Dict[str, Any]] = None, +) -> DatasetMetadata: + """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" + return DatasetMetadata( + name=name, + api_version=api_version, + created_on=created_on, + attributes=attributes, + ) + + +def make_dataset( + variables: List[Variable], + metadata: DatasetMetadata, +) -> Dataset: + """Create a Dataset with the given variables and metadata.""" + return Dataset( + variables=variables, + metadata=metadata, + ) + + +class AbstractTemplateFactory: + """Abstract factory for creating MDIO datasets.""" + + def __init__(self, name: str): + """Initialize the factory. + + Args: + name: Name of the dataset + """ + self.name = name + self.api_version = "1.0.0" # TODO: Pull from package metadata + self.created_on = datetime.now(timezone.utc) + self.dimensions: List[NamedDimension] = [] + self.coordinates: List[Coordinate] = [] + self.variables: List[Variable] = [] + + def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory": + """Add a dimension to the factory.""" + self.dimensions.append(make_named_dimension(name, size)) + return self + + def add_coordinate( + self, + name: str = "", + dimensions: Optional[List[NamedDimension | str]] = None, + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + metadata: Optional[List[AllUnits | UserAttributes]] = None, + ) -> "AbstractTemplateFactory": + """Add a coordinate to the factory.""" + if name == "": + name = f"coord_{len(self.coordinates)}" + if dimensions is None: + dimensions = self.dimensions + self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) + return self + + def add_variable( + self, + name: str = "", + dimensions: Optional[List[NamedDimension | str]] = None, + data_type: ScalarType | StructuredType = ScalarType.FLOAT32, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[VariableMetadata] = None, + ) -> "AbstractTemplateFactory": + """Add a variable to the factory.""" + if name == "": + name = f"var_{len(self.variables)}" + if dimensions is None: + dimensions = self.dimensions + self.variables.append( + make_variable( + name, dimensions, data_type, compressor, coordinates, metadata + ) + ) + return self + + def _compose_metadata(self) -> DatasetMetadata: + """Compose the DatasetMetadata with the given name, api_version, and created_on.""" + return make_dataset_metadata(self.name, self.api_version, self.created_on) + + def _compose_variables(self) -> List[Variable]: + """Compose the Variables with the given parameters.""" + return [ + make_variable( + self.name, + self.dimensions, + self.data_type, + self.compressor, + self.coordinates, + self.metadata, + ) + ] + + def make_dataset(self, variables: List[Variable]) -> Dataset: + """Create a Dataset with the given variables and metadata.""" + return Dataset(variables=variables, metadata=self._compose_metadata()) \ No newline at end of file diff --git a/src/mdio/schemas/v1/template_builder.py b/src/mdio/schemas/v1/template_builder.py index 6db6f825..e5fad4a1 100644 --- a/src/mdio/schemas/v1/template_builder.py +++ b/src/mdio/schemas/v1/template_builder.py @@ -18,11 +18,11 @@ from mdio.schema.v1.dataset import Dataset # from mdio.schema.v1.dataset import DatasetMetadata -from mdio.schema.v1.template_factory import make_coordinate -from mdio.schema.v1.template_factory import make_dataset -from mdio.schema.v1.template_factory import make_dataset_metadata -from mdio.schema.v1.template_factory import make_named_dimension -from mdio.schema.v1.template_factory import make_variable +from .factory import make_coordinate +from .factory import make_dataset +from .factory import make_dataset_metadata +from .factory import make_named_dimension +from .factory import make_variable from mdio.schema.v1.units import AllUnits from mdio.schema.v1.variable import Coordinate from mdio.schema.v1.variable import Variable @@ -41,7 +41,7 @@ class _BuilderState(Enum): HAS_VARIABLES = auto() -class TemplateBuilder: +class Builder: """Builder for creating MDIO datasets with enforced build order. The build order is: @@ -75,7 +75,7 @@ def add_dimension( long_name: str = None, data_type: ScalarType | StructuredType = ScalarType.INT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "TemplateBuilder": + ) -> "Builder": """Add a dimension. This must be called at least once before adding coordinates or variables. @@ -115,7 +115,7 @@ def add_coordinate( dimensions: Optional[List[NamedDimension | str]] = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "TemplateBuilder": + ) -> "Builder": """Add a coordinate after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: raise ValueError( @@ -162,7 +162,7 @@ def add_variable( compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, metadata: Optional[VariableMetadata] = None, - ) -> "TemplateBuilder": + ) -> "Builder": """Add a variable after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: raise ValueError("Must add at least one dimension before adding variables") @@ -221,4 +221,4 @@ def build(self) -> Dataset: ) all_variables.append(coord_var) - return make_dataset(all_variables, metadata) + return make_dataset(all_variables, metadata) \ No newline at end of file diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py index 79df3558..7e0f26f8 100644 --- a/src/mdio/schemas/v1/template_factory.py +++ b/src/mdio/schemas/v1/template_factory.py @@ -224,4 +224,4 @@ def _compose_variables(self) -> List[Variable]: def make_dataset(self, variables: List[Variable]) -> Dataset: """Create a Dataset with the given variables and metadata.""" - return Dataset(variables=variables, metadata=self._compose_metadata()) + return Dataset(variables=variables, metadata=self._compose_metadata()) \ No newline at end of file diff --git a/tests/integration/test_v1_constructor.py b/tests/integration/test_v1_constructor.py index 81b49479..4ff93b32 100644 --- a/tests/integration/test_v1_constructor.py +++ b/tests/integration/test_v1_constructor.py @@ -1,18 +1,21 @@ """Integration test for MDIO v1 Xarray Zarr constructor.""" from datetime import datetime +from datetime import timezone import numpy as np +import pytest -from mdio.core.v1.constructor import write_mdio_metadata +from mdio.core.v1 import mdio from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType -from mdio.schema.v1.template_factory import make_dataset -from mdio.schema.v1.template_factory import make_dataset_metadata -from mdio.schema.v1.template_factory import make_named_dimension -from mdio.schema.v1.template_factory import make_variable +from mdio.core.v1.factory import make_dataset +from mdio.core.v1.factory import make_dataset_metadata +from mdio.core.v1.factory import make_named_dimension +from mdio.core.v1.factory import make_variable +from mdio.core.v1.constructor import write_mdio_metadata def build_toy_dataset(): diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 4a145922..cd5854d0 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -1,16 +1,17 @@ -"""Unit tests for the MDIO v1 template builder.""" +"""Unit tests for MDIO v1 schema builder.""" from datetime import datetime +from datetime import timezone import pytest -from mdio.core.v1.constructor import write_mdio_metadata from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.template_builder import TemplateBuilder -from mdio.schema.v1.template_builder import _BuilderState +from mdio.core.v1.builder import Builder as TemplateBuilder +from mdio.core.v1.builder import _BuilderState +from mdio.core.v1.constructor import write_mdio_metadata def test_builder_initialization(): diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 2dcbc3ec..8fec87f5 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -1,8 +1,9 @@ -"""Unit tests for MDIO v1 template_factory.""" +"""Unit tests for MDIO v1 factory.""" from datetime import datetime from datetime import timezone +import numpy as np import pytest from pydantic import ValidationError @@ -11,11 +12,12 @@ from mdio.schema.dimension import NamedDimension from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType -from mdio.schema.v1.template_factory import make_coordinate -from mdio.schema.v1.template_factory import make_dataset -from mdio.schema.v1.template_factory import make_dataset_metadata -from mdio.schema.v1.template_factory import make_named_dimension -from mdio.schema.v1.template_factory import make_variable +from mdio.schema.metadata import UserAttributes +from mdio.core.v1.factory import make_coordinate +from mdio.core.v1.factory import make_dataset +from mdio.core.v1.factory import make_dataset_metadata +from mdio.core.v1.factory import make_named_dimension +from mdio.core.v1.factory import make_variable def test_make_named_dimension(): From 370727919f6bd60f7d248bda1683f276b13376a9 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 18:22:36 +0000 Subject: [PATCH 18/55] Linting --- src/mdio/core/v1/__init__.py | 14 ++++++-------- src/mdio/core/v1/builder.py | 10 +++++----- src/mdio/core/v1/factory.py | 2 +- tests/integration/test_v1_constructor.py | 13 +++++-------- tests/unit/schema/v1/test_template_builder.py | 7 +++---- tests/unit/test_template_factory.py | 12 +++++------- 6 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index 73ce3ab1..89743685 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -6,14 +6,12 @@ from ._overloads import mdio from .builder import Builder from .constructor import write_mdio_metadata -from .factory import ( - AbstractTemplateFactory, - make_coordinate, - make_dataset, - make_dataset_metadata, - make_named_dimension, - make_variable, -) +from .factory import AbstractTemplateFactory +from .factory import make_coordinate +from .factory import make_dataset +from .factory import make_dataset_metadata +from .factory import make_named_dimension +from .factory import make_variable __all__ = [ diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index e5fad4a1..563a7c63 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -16,6 +16,10 @@ from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata # from mdio.schema.v1.dataset import DatasetMetadata from .factory import make_coordinate @@ -23,10 +27,6 @@ from .factory import make_dataset_metadata from .factory import make_named_dimension from .factory import make_variable -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata # from pydantic import AwareDatetime @@ -221,4 +221,4 @@ def build(self) -> Dataset: ) all_variables.append(coord_var) - return make_dataset(all_variables, metadata) \ No newline at end of file + return make_dataset(all_variables, metadata) diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 7e0f26f8..79df3558 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -224,4 +224,4 @@ def _compose_variables(self) -> List[Variable]: def make_dataset(self, variables: List[Variable]) -> Dataset: """Create a Dataset with the given variables and metadata.""" - return Dataset(variables=variables, metadata=self._compose_metadata()) \ No newline at end of file + return Dataset(variables=variables, metadata=self._compose_metadata()) diff --git a/tests/integration/test_v1_constructor.py b/tests/integration/test_v1_constructor.py index 4ff93b32..66573220 100644 --- a/tests/integration/test_v1_constructor.py +++ b/tests/integration/test_v1_constructor.py @@ -1,21 +1,18 @@ """Integration test for MDIO v1 Xarray Zarr constructor.""" from datetime import datetime -from datetime import timezone import numpy as np -import pytest -from mdio.core.v1 import mdio -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType +from mdio.core.v1.constructor import write_mdio_metadata from mdio.core.v1.factory import make_dataset from mdio.core.v1.factory import make_dataset_metadata from mdio.core.v1.factory import make_named_dimension from mdio.core.v1.factory import make_variable -from mdio.core.v1.constructor import write_mdio_metadata +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType def build_toy_dataset(): diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index cd5854d0..979ef107 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -1,17 +1,16 @@ """Unit tests for MDIO v1 schema builder.""" from datetime import datetime -from datetime import timezone import pytest +from mdio.core.v1.builder import Builder as TemplateBuilder +from mdio.core.v1.builder import _BuilderState +from mdio.core.v1.constructor import write_mdio_metadata from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType from mdio.schema.v1.dataset import Dataset -from mdio.core.v1.builder import Builder as TemplateBuilder -from mdio.core.v1.builder import _BuilderState -from mdio.core.v1.constructor import write_mdio_metadata def test_builder_initialization(): diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 8fec87f5..2b1caa3f 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -3,21 +3,19 @@ from datetime import datetime from datetime import timezone -import numpy as np import pytest from pydantic import ValidationError -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes from mdio.core.v1.factory import make_coordinate from mdio.core.v1.factory import make_dataset from mdio.core.v1.factory import make_dataset_metadata from mdio.core.v1.factory import make_named_dimension from mdio.core.v1.factory import make_variable +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType def test_make_named_dimension(): From cec107d4e67cd4e056f14ddfc91331e3b2cd8a19 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 19:41:42 +0000 Subject: [PATCH 19/55] Refactor --- src/mdio/core/v1/__init__.py | 14 +- src/mdio/core/v1/_serializer.py | 264 ++++++++++++++++++ src/mdio/core/v1/builder.py | 79 +++++- src/mdio/core/v1/constructor.py | 177 ------------ src/mdio/core/v1/factory.py | 140 +--------- tests/integration/test_v1_constructor.py | 10 +- tests/unit/schema/v1/test_template_builder.py | 2 +- tests/unit/test_template_factory.py | 10 +- 8 files changed, 361 insertions(+), 335 deletions(-) create mode 100644 src/mdio/core/v1/_serializer.py delete mode 100644 src/mdio/core/v1/constructor.py diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index 89743685..d353d3e2 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -5,13 +5,15 @@ from ._overloads import mdio from .builder import Builder -from .constructor import write_mdio_metadata +from .builder import write_mdio_metadata +from ._serializer import ( + make_coordinate, + make_dataset, + make_dataset_metadata, + make_named_dimension, + make_variable, +) from .factory import AbstractTemplateFactory -from .factory import make_coordinate -from .factory import make_dataset -from .factory import make_dataset_metadata -from .factory import make_named_dimension -from .factory import make_variable __all__ = [ diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py new file mode 100644 index 00000000..dc859eb6 --- /dev/null +++ b/src/mdio/core/v1/_serializer.py @@ -0,0 +1,264 @@ +"""Internal serialization module for MDIO v1 datasets. + +This module contains internal implementation details for serializing MDIO schema models +to Zarr storage. This API is not considered stable and may change without notice. +""" + +from typing import Any, Dict, List, Optional + +import numpy as np +from numcodecs import Blosc as NumcodecsBlosc +from datetime import datetime + +from mdio.core.v1._overloads import mdio +from mdio.schema.compressors import ZFP +from mdio.schema.compressors import Blosc +from mdio.schema.dimension import NamedDimension +from mdio.schema.dtype import ScalarType +from mdio.schema.dtype import StructuredType +from mdio.schema.metadata import UserAttributes +from mdio.schema.v1.dataset import Dataset as MDIODataset +from mdio.schema.v1.dataset import DatasetMetadata +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata + + +try: + import zfpy as zfpy_base # Base library + from numcodecs import ZFPY # Codec +except ImportError: + print(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") + zfpy_base = None + ZFPY = None + + +def make_named_dimension(name: str, size: int) -> NamedDimension: + """Create a NamedDimension with the given name and size.""" + return NamedDimension(name=name, size=size) + + +def make_coordinate( + name: str, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + long_name: str = None, + metadata: Optional[List[AllUnits | UserAttributes]] = None, +) -> Coordinate: + """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" + return Coordinate( + name=name, + long_name=long_name, + dimensions=dimensions, + data_type=data_type, + metadata=metadata, + ) + + +def make_variable( # noqa: C901 + name: str, + dimensions: List[NamedDimension | str], + data_type: ScalarType | StructuredType, + long_name: str = None, + compressor: Blosc | ZFP | None = None, + coordinates: Optional[List[Coordinate | str]] = None, + metadata: Optional[ + List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata + ] = None, +) -> Variable: + """Create a Variable with the given parameters. + + Args: + name: Name of the variable + dimensions: List of dimensions + data_type: Data type of the variable + long_name: Optional long name + compressor: Optional compressor + coordinates: Optional list of coordinates + metadata: Optional metadata + + Returns: + Variable: A Variable instance with the specified parameters. + + Raises: + TypeError: If the metadata type is not supported. + """ + # Convert metadata to VariableMetadata if needed + var_metadata = None + if metadata: + if isinstance(metadata, list): + # Convert list of metadata to dict + metadata_dict = {} + for md in metadata: + if isinstance(md, AllUnits): + # For units_v1, if it's a single element, use it directly + if isinstance(md.units_v1, list) and len(md.units_v1) == 1: + metadata_dict["units_v1"] = md.units_v1[0] + else: + metadata_dict["units_v1"] = md.units_v1 + elif isinstance(md, UserAttributes): + # For attributes, if it's a single element, use it directly + attrs = md.model_dump(by_alias=True) + if isinstance(attrs, list) and len(attrs) == 1: + metadata_dict["attributes"] = attrs[0] + else: + metadata_dict["attributes"] = attrs + var_metadata = VariableMetadata(**metadata_dict) + elif isinstance(metadata, dict): + # Convert camelCase keys to snake_case for VariableMetadata + converted_dict = {} + for key, value in metadata.items(): + if key == "unitsV1": + # For units_v1, if it's a single element array, use the element directly + if isinstance(value, list) and len(value) == 1: + converted_dict["units_v1"] = value[0] + else: + converted_dict["units_v1"] = value + else: + converted_dict[key] = value + var_metadata = VariableMetadata(**converted_dict) + elif isinstance(metadata, VariableMetadata): + var_metadata = metadata + else: + raise TypeError(f"Unsupported metadata type: {type(metadata)}") + + # Create the variable with all attributes explicitly set + return Variable( + name=name, + long_name=long_name, + dimensions=dimensions, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=var_metadata, + ) + + +def make_dataset_metadata( + name: str, + api_version: str, + created_on: datetime, + attributes: Optional[Dict[str, Any]] = None, +) -> DatasetMetadata: + """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" + return DatasetMetadata( + name=name, + api_version=api_version, + created_on=created_on, + attributes=attributes, + ) + + +def make_dataset( + variables: List[Variable], + metadata: DatasetMetadata, +) -> MDIODataset: + """Create a Dataset with the given variables and metadata.""" + return MDIODataset( + variables=variables, + metadata=metadata, + ) + + +def _convert_compressor( + model: Blosc | ZFP | None, +) -> NumcodecsBlosc | ZFPY | None: + if isinstance(model, Blosc): + return NumcodecsBlosc( + cname=model.algorithm.value, + clevel=model.level, + shuffle=model.shuffle.value, + blocksize=model.blocksize if model.blocksize > 0 else 0, + ) + elif isinstance(model, ZFP): + if zfpy_base is None or ZFPY is None: + raise ImportError("zfpy and numcodecs are required to use ZFP compression") + return ZFPY( + mode=model.mode.value, + tolerance=model.tolerance, + rate=model.rate, + precision=model.precision, + ) + elif model is None: + return None + else: + raise TypeError(f"Unsupported compressor model: {type(model)}") + + +def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 + """Build an MDIO dataset with correct dimensions and dtypes. + + This internal function constructs the underlying data structure for an MDIO dataset, + handling dimension mapping, data types, and metadata organization. + + Args: + mdio_ds: The source MDIO dataset to construct from. + + Returns: + The constructed dataset with proper MDIO structure and metadata. + + Raises: + TypeError: If an unsupported data type is encountered. + """ + # Collect dimension sizes + dims: dict[str, int] = {} + for var in mdio_ds.variables: + for d in var.dimensions: + if isinstance(d, NamedDimension): + dims[d.name] = d.size + + # Build data variables + data_vars: dict[str, mdio.DataArray] = {} + for var in mdio_ds.variables: + dim_names = [ + d.name if isinstance(d, NamedDimension) else d for d in var.dimensions + ] + shape = tuple(dims[name] for name in dim_names) + dt = var.data_type + if isinstance(dt, ScalarType): + dtype = np.dtype(dt.value) + elif isinstance(dt, StructuredType): + dtype = np.dtype([(f.name, f.format.value) for f in dt.fields]) + else: + raise TypeError(f"Unsupported data_type: {dt}") + arr = np.zeros(shape, dtype=dtype) + data_array = mdio.DataArray(arr, dims=dim_names) + data_array.encoding["fill_value"] = 0.0 + + # Set long_name if present + if var.long_name is not None: + data_array.attrs["long_name"] = var.long_name + + # Set coordinates if present, excluding dimension names + if var.coordinates is not None: + dim_set = set(dim_names) + coord_names = [ + c.name if isinstance(c, Coordinate) else c + for c in var.coordinates + if (c.name if isinstance(c, Coordinate) else c) not in dim_set + ] + if coord_names: + data_array.attrs["coordinates"] = " ".join(coord_names) + + # Attach variable metadata into DataArray attributes + if var.metadata is not None: + md = var.metadata.model_dump( + by_alias=True, + exclude_none=True, + exclude={"chunk_grid"}, + ) + for key, value in md.items(): + if isinstance(value, list) and len(value) == 1: + md[key] = value[0] + data_array.attrs.update(md) + data_vars[var.name] = data_array + + ds = mdio.Dataset(data_vars) + # Attach dataset metadata + ds.attrs["apiVersion"] = mdio_ds.metadata.api_version + ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) + ds.attrs["name"] = mdio_ds.metadata.name + if mdio_ds.metadata.attributes: + ds.attrs["attributes"] = mdio_ds.metadata.attributes + return ds diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 563a7c63..6705d229 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -9,6 +9,8 @@ from typing import List from typing import Optional +from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 + from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension @@ -16,21 +18,21 @@ from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset + +# Import factory functions from serializer module +from ._serializer import make_coordinate +from ._serializer import make_dataset +from ._serializer import make_dataset_metadata +from ._serializer import make_named_dimension +from ._serializer import make_variable +from ._serializer import _convert_compressor +from ._serializer import _construct_mdio_dataset +from mdio.core.v1._overloads import mdio from mdio.schema.v1.units import AllUnits from mdio.schema.v1.variable import Coordinate from mdio.schema.v1.variable import Variable from mdio.schema.v1.variable import VariableMetadata -# from mdio.schema.v1.dataset import DatasetMetadata -from .factory import make_coordinate -from .factory import make_dataset -from .factory import make_dataset_metadata -from .factory import make_named_dimension -from .factory import make_variable - - -# from pydantic import AwareDatetime - class _BuilderState(Enum): """States for the template builder.""" @@ -222,3 +224,60 @@ def build(self) -> Dataset: all_variables.append(coord_var) return make_dataset(all_variables, metadata) + + +def write_mdio_metadata( + mdio_ds: Dataset, store: str, **kwargs: Any +) -> mdio.Dataset: + """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. + + This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata + to a Zarr store. The actual data is not written, only the metadata structure is created. + + Args: + mdio_ds: The MDIO dataset to serialize + store: Path to the Zarr store + **kwargs: Additional arguments to pass to to_zarr() + + Returns: + The constructed xarray Dataset with MDIO extensions + """ + ds = _construct_mdio_dataset(mdio_ds) + + def _generate_encodings() -> dict: + """Generate encodings for each variable in the MDIO dataset. + + Returns: + Dictionary mapping variable names to their encoding configurations. + """ + # TODO: Re-enable chunk_key_encoding when supported by xarray + # dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() + global_encodings = {} + for var in mdio_ds.variables: + fill_value = 0 + if isinstance(var.data_type, StructuredType): + continue + chunks = None + if var.metadata is not None and var.metadata.chunk_grid is not None: + chunks = var.metadata.chunk_grid.configuration.chunk_shape + global_encodings[var.name] = { + "chunks": chunks, + # TODO: Re-enable chunk_key_encoding when supported by xarray + # "chunk_key_encoding": dimension_separator_encoding, + "_FillValue": fill_value, + "dtype": var.data_type, + "compressors": _convert_compressor(var.compressor), + } + return global_encodings + + ds.to_mdio( + store, + mode="w", + zarr_format=2, + consolidated=True, + safe_chunks=False, + compute=False, + encoding=_generate_encodings(), + **kwargs, + ) + return ds diff --git a/src/mdio/core/v1/constructor.py b/src/mdio/core/v1/constructor.py deleted file mode 100644 index 76af2b35..00000000 --- a/src/mdio/core/v1/constructor.py +++ /dev/null @@ -1,177 +0,0 @@ -"""Construct an MDIO dataset and write to Zarr.""" - -from typing import Any - -import numpy as np -from numcodecs import Blosc as NumcodecsBlosc -from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 - -from mdio.core.v1._overloads import mdio -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.v1.dataset import Dataset as MDIODataset -from mdio.schema.v1.variable import Coordinate - - -try: - import zfpy as zfpy_base # Base library - from numcodecs import ZFPY # Codec -except ImportError: - print(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") - zfpy_base = None - ZFPY = None - - -def _convert_compressor( - model: Blosc | ZFP | None, -) -> NumcodecsBlosc | ZFPY | None: - if isinstance(model, Blosc): - return NumcodecsBlosc( - cname=model.algorithm.value, - clevel=model.level, - shuffle=model.shuffle.value, - blocksize=model.blocksize if model.blocksize > 0 else 0, - ) - elif isinstance(model, ZFP): - if zfpy_base is None or ZFPY is None: - raise ImportError("zfpy and numcodecs are required to use ZFP compression") - return ZFPY( - mode=model.mode.value, - tolerance=model.tolerance, - rate=model.rate, - precision=model.precision, - ) - elif model is None: - return None - else: - raise TypeError(f"Unsupported compressor model: {type(model)}") - - -def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 - """Build an MDIO dataset with correct dimensions and dtypes. - - This internal function constructs the underlying data structure for an MDIO dataset, - handling dimension mapping, data types, and metadata organization. - - Args: - mdio_ds: The source MDIO dataset to construct from. - - Returns: - The constructed dataset with proper MDIO structure and metadata. - - Raises: - TypeError: If an unsupported data type is encountered. - """ - # Collect dimension sizes - dims: dict[str, int] = {} - for var in mdio_ds.variables: - for d in var.dimensions: - if isinstance(d, NamedDimension): - dims[d.name] = d.size - - # Build data variables - data_vars: dict[str, mdio.DataArray] = {} - for var in mdio_ds.variables: - dim_names = [ - d.name if isinstance(d, NamedDimension) else d for d in var.dimensions - ] - shape = tuple(dims[name] for name in dim_names) - dt = var.data_type - if isinstance(dt, ScalarType): - dtype = np.dtype(dt.value) - elif isinstance(dt, StructuredType): - dtype = np.dtype([(f.name, f.format.value) for f in dt.fields]) - else: - raise TypeError(f"Unsupported data_type: {dt}") - arr = np.zeros(shape, dtype=dtype) - data_array = mdio.DataArray(arr, dims=dim_names) - data_array.encoding["fill_value"] = 0.0 - - # Set long_name if present - if var.long_name is not None: - data_array.attrs["long_name"] = var.long_name - - # Set coordinates if present, excluding dimension names - if var.coordinates is not None: - dim_set = set(dim_names) - coord_names = [ - c.name if isinstance(c, Coordinate) else c - for c in var.coordinates - if (c.name if isinstance(c, Coordinate) else c) not in dim_set - ] - if coord_names: - data_array.attrs["coordinates"] = " ".join(coord_names) - - # Attach variable metadata into DataArray attributes - if var.metadata is not None: - md = var.metadata.model_dump( - by_alias=True, - exclude_none=True, - exclude={"chunk_grid"}, - ) - for key, value in md.items(): - if isinstance(value, list) and len(value) == 1: - md[key] = value[0] - data_array.attrs.update(md) - data_vars[var.name] = data_array - - ds = mdio.Dataset(data_vars) - # Attach dataset metadata - ds.attrs["apiVersion"] = mdio_ds.metadata.api_version - ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) - ds.attrs["name"] = mdio_ds.metadata.name - if mdio_ds.metadata.attributes: - ds.attrs["attributes"] = mdio_ds.metadata.attributes - return ds - - -def write_mdio_metadata( - mdio_ds: MDIODataset, store: str, **kwargs: Any -) -> mdio.Dataset: - """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. - - This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata - to a Zarr store. The actual data is not written, only the metadata structure is created. - """ - ds = _construct_mdio_dataset(mdio_ds) - - def _generate_encodings() -> dict: - """Generate encodings for each variable in the MDIO dataset. - - Returns: - Dictionary mapping variable names to their encoding configurations. - """ - # TODO: Re-enable chunk_key_encoding when supported by xarray - # dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() - global_encodings = {} - for var in mdio_ds.variables: - fill_value = 0 - if isinstance(var.data_type, StructuredType): - continue - chunks = None - if var.metadata is not None and var.metadata.chunk_grid is not None: - chunks = var.metadata.chunk_grid.configuration.chunk_shape - global_encodings[var.name] = { - "chunks": chunks, - # TODO: Re-enable chunk_key_encoding when supported by xarray - # "chunk_key_encoding": dimension_separator_encoding, - "_FillValue": fill_value, - "dtype": var.data_type, - "compressors": _convert_compressor(var.compressor), - } - return global_encodings - - ds.to_mdio( - store, - mode="w", - zarr_format=2, - consolidated=True, - safe_chunks=False, - compute=False, - encoding=_generate_encodings(), - **kwargs, - ) - return ds diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 79df3558..0dc4bf3f 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -1,9 +1,7 @@ -"""Factory methods for MDIO v1 schema models.""" +"""Factory implementation for MDIO v1 datasets.""" from datetime import datetime from datetime import timezone -from typing import Any -from typing import Dict from typing import List from typing import Optional @@ -14,138 +12,18 @@ from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.dataset import DatasetMetadata from mdio.schema.v1.units import AllUnits from mdio.schema.v1.variable import Coordinate from mdio.schema.v1.variable import Variable from mdio.schema.v1.variable import VariableMetadata - -def make_named_dimension(name: str, size: int) -> NamedDimension: - """Create a NamedDimension with the given name and size.""" - return NamedDimension(name=name, size=size) - - -def make_coordinate( - name: str, - dimensions: List[NamedDimension | str], - data_type: ScalarType | StructuredType, - long_name: str = None, - metadata: Optional[List[AllUnits | UserAttributes]] = None, -) -> Coordinate: - """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" - return Coordinate( - name=name, - long_name=long_name, - dimensions=dimensions, - data_type=data_type, - metadata=metadata, - ) - - -def make_variable( # noqa: C901 - name: str, - dimensions: List[NamedDimension | str], - data_type: ScalarType | StructuredType, - long_name: str = None, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[ - List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata - ] = None, -) -> Variable: - """Create a Variable with the given parameters. - - Args: - name: Name of the variable - dimensions: List of dimensions - data_type: Data type of the variable - long_name: Optional long name - compressor: Optional compressor - coordinates: Optional list of coordinates - metadata: Optional metadata - - Returns: - Variable: A Variable instance with the specified parameters. - - Raises: - TypeError: If the metadata type is not supported. - """ - # Convert metadata to VariableMetadata if needed - var_metadata = None - if metadata: - if isinstance(metadata, list): - # Convert list of metadata to dict - metadata_dict = {} - for md in metadata: - if isinstance(md, AllUnits): - # For units_v1, if it's a single element, use it directly - if isinstance(md.units_v1, list) and len(md.units_v1) == 1: - metadata_dict["units_v1"] = md.units_v1[0] - else: - metadata_dict["units_v1"] = md.units_v1 - elif isinstance(md, UserAttributes): - # For attributes, if it's a single element, use it directly - attrs = md.model_dump(by_alias=True) - if isinstance(attrs, list) and len(attrs) == 1: - metadata_dict["attributes"] = attrs[0] - else: - metadata_dict["attributes"] = attrs - var_metadata = VariableMetadata(**metadata_dict) - elif isinstance(metadata, dict): - # Convert camelCase keys to snake_case for VariableMetadata - converted_dict = {} - for key, value in metadata.items(): - if key == "unitsV1": - # For units_v1, if it's a single element array, use the element directly - if isinstance(value, list) and len(value) == 1: - converted_dict["units_v1"] = value[0] - else: - converted_dict["units_v1"] = value - else: - converted_dict[key] = value - var_metadata = VariableMetadata(**converted_dict) - elif isinstance(metadata, VariableMetadata): - var_metadata = metadata - else: - raise TypeError(f"Unsupported metadata type: {type(metadata)}") - - # Create the variable with all attributes explicitly set - return Variable( - name=name, - long_name=long_name, - dimensions=dimensions, - data_type=data_type, - compressor=compressor, - coordinates=coordinates, - metadata=var_metadata, - ) - - -def make_dataset_metadata( - name: str, - api_version: str, - created_on: datetime, - attributes: Optional[Dict[str, Any]] = None, -) -> DatasetMetadata: - """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" - return DatasetMetadata( - name=name, - api_version=api_version, - created_on=created_on, - attributes=attributes, - ) - - -def make_dataset( - variables: List[Variable], - metadata: DatasetMetadata, -) -> Dataset: - """Create a Dataset with the given variables and metadata.""" - return Dataset( - variables=variables, - metadata=metadata, - ) +from ._serializer import ( + make_coordinate, + make_dataset, + make_dataset_metadata, + make_named_dimension, + make_variable, +) class AbstractTemplateFactory: @@ -205,7 +83,7 @@ def add_variable( ) return self - def _compose_metadata(self) -> DatasetMetadata: + def _compose_metadata(self): """Compose the DatasetMetadata with the given name, api_version, and created_on.""" return make_dataset_metadata(self.name, self.api_version, self.created_on) diff --git a/tests/integration/test_v1_constructor.py b/tests/integration/test_v1_constructor.py index 66573220..3921d785 100644 --- a/tests/integration/test_v1_constructor.py +++ b/tests/integration/test_v1_constructor.py @@ -4,11 +4,11 @@ import numpy as np -from mdio.core.v1.constructor import write_mdio_metadata -from mdio.core.v1.factory import make_dataset -from mdio.core.v1.factory import make_dataset_metadata -from mdio.core.v1.factory import make_named_dimension -from mdio.core.v1.factory import make_variable +from mdio.core.v1.builder import write_mdio_metadata +from mdio.core.v1._serializer import make_dataset +from mdio.core.v1._serializer import make_dataset_metadata +from mdio.core.v1._serializer import make_named_dimension +from mdio.core.v1._serializer import make_variable from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 979ef107..f6c64a81 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -6,7 +6,7 @@ from mdio.core.v1.builder import Builder as TemplateBuilder from mdio.core.v1.builder import _BuilderState -from mdio.core.v1.constructor import write_mdio_metadata +from mdio.core.v1.builder import write_mdio_metadata from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 2b1caa3f..87608b02 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -6,11 +6,11 @@ import pytest from pydantic import ValidationError -from mdio.core.v1.factory import make_coordinate -from mdio.core.v1.factory import make_dataset -from mdio.core.v1.factory import make_dataset_metadata -from mdio.core.v1.factory import make_named_dimension -from mdio.core.v1.factory import make_variable +from mdio.core.v1._serializer import make_coordinate +from mdio.core.v1._serializer import make_dataset +from mdio.core.v1._serializer import make_dataset_metadata +from mdio.core.v1._serializer import make_named_dimension +from mdio.core.v1._serializer import make_variable from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension From a67ccc216badd5940cbdb261aa8ba69e265c2740 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 19:52:00 +0000 Subject: [PATCH 20/55] Rename test --- .../{test_v1_constructor.py => test_v1_serialization.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/integration/{test_v1_constructor.py => test_v1_serialization.py} (100%) diff --git a/tests/integration/test_v1_constructor.py b/tests/integration/test_v1_serialization.py similarity index 100% rename from tests/integration/test_v1_constructor.py rename to tests/integration/test_v1_serialization.py From f58e63a8828556b301c8c2a78b684238181a01eb Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 19:52:22 +0000 Subject: [PATCH 21/55] Rename builder class --- src/mdio/core/v1/__init__.py | 4 ++-- src/mdio/core/v1/builder.py | 11 +++++----- tests/unit/schema/v1/test_template_builder.py | 22 +++++++++---------- tests/unit/test_template_factory.py | 2 ++ 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index d353d3e2..5f7513dc 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -4,7 +4,7 @@ """ from ._overloads import mdio -from .builder import Builder +from .builder import MDIODatasetBuilder from .builder import write_mdio_metadata from ._serializer import ( make_coordinate, @@ -17,7 +17,7 @@ __all__ = [ - "Builder", + "MDIODatasetBuilder", "AbstractTemplateFactory", "make_coordinate", "make_dataset", diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 6705d229..03651730 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -43,10 +43,11 @@ class _BuilderState(Enum): HAS_VARIABLES = auto() -class Builder: +class MDIODatasetBuilder: """Builder for creating MDIO datasets with enforced build order. - The build order is: + This builder implements the builder pattern to create MDIO datasets with a v1 schema. + It enforces a specific build order to ensure valid dataset construction: 1. Must add dimensions first via add_dimension() 2. Can optionally add coordinates via add_coordinate() 3. Must add variables via add_variable() @@ -77,7 +78,7 @@ def add_dimension( long_name: str = None, data_type: ScalarType | StructuredType = ScalarType.INT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "Builder": + ) -> "MDIODatasetBuilder": """Add a dimension. This must be called at least once before adding coordinates or variables. @@ -117,7 +118,7 @@ def add_coordinate( dimensions: Optional[List[NamedDimension | str]] = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "Builder": + ) -> "MDIODatasetBuilder": """Add a coordinate after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: raise ValueError( @@ -164,7 +165,7 @@ def add_variable( compressor: Blosc | ZFP | None = None, coordinates: Optional[List[Coordinate | str]] = None, metadata: Optional[VariableMetadata] = None, - ) -> "Builder": + ) -> "MDIODatasetBuilder": """Add a variable after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: raise ValueError("Must add at least one dimension before adding variables") diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index f6c64a81..a1df28ac 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -4,7 +4,7 @@ import pytest -from mdio.core.v1.builder import Builder as TemplateBuilder +from mdio.core.v1.builder import MDIODatasetBuilder from mdio.core.v1.builder import _BuilderState from mdio.core.v1.builder import write_mdio_metadata from mdio.schema.compressors import Blosc @@ -15,7 +15,7 @@ def test_builder_initialization(): """Test basic builder initialization.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") assert builder.name == "test_dataset" assert builder.api_version == "1.0.0" assert isinstance(builder.created_on, datetime) @@ -27,7 +27,7 @@ def test_builder_initialization(): def test_dimension_builder_state(): """Test dimension builder state transitions and functionality.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") # First dimension should change state to HAS_DIMENSIONS and create a variable builder = builder.add_dimension("x", 100, long_name="X Dimension") @@ -55,7 +55,7 @@ def test_dimension_builder_state(): def test_dimension_with_metadata(): """Test adding dimensions with custom metadata.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") # Add dimension with custom metadata builder = builder.add_dimension( @@ -74,7 +74,7 @@ def test_dimension_with_metadata(): def test_coordinate_builder_state(): """Test coordinate builder state transitions and functionality.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") # Should not be able to add coordinates before dimensions with pytest.raises( @@ -106,7 +106,7 @@ def test_coordinate_builder_state(): def test_variable_builder_state(): """Test variable builder state transitions and functionality.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") # Should not be able to add variables before dimensions with pytest.raises( @@ -136,7 +136,7 @@ def test_variable_builder_state(): def test_build_dataset(): """Test building a complete dataset.""" dataset = ( - TemplateBuilder("test_dataset") + MDIODatasetBuilder("test_dataset") .add_dimension("x", 100) .add_dimension("y", 200) .add_coordinate("x_coord", dimensions=["x"]) @@ -159,7 +159,7 @@ def test_build_dataset(): def test_auto_naming(): """Test automatic naming of coordinates and variables.""" dataset = ( - TemplateBuilder("test_dataset") + MDIODatasetBuilder("test_dataset") .add_dimension("x", 100) .add_coordinate() # Should be named "coord_0" .add_coordinate() # Should be named "coord_1" @@ -176,7 +176,7 @@ def test_auto_naming(): def test_default_dimensions(): """Test that coordinates and variables use all dimensions by default.""" dataset = ( - TemplateBuilder("test_dataset") + MDIODatasetBuilder("test_dataset") .add_dimension("x", 100) .add_dimension("y", 200) .add_coordinate() # Should use both x and y dimensions @@ -194,7 +194,7 @@ def test_default_dimensions(): def test_build_order_enforcement(): """Test that the builder enforces the correct build order.""" - builder = TemplateBuilder("test_dataset") + builder = MDIODatasetBuilder("test_dataset") # Should not be able to add coordinates before dimensions with pytest.raises( @@ -218,7 +218,7 @@ def test_build_order_enforcement(): def test_toy_example(tmp_path): """Test building a toy dataset with multiple variables and attributes.""" dataset = ( - TemplateBuilder( + MDIODatasetBuilder( "campos_3d", attributes={ "textHeader": [ diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 87608b02..c091aeba 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -1,5 +1,7 @@ """Unit tests for MDIO v1 factory.""" +# TODO(BrianMichell): Update this to use canonical factory functions. + from datetime import datetime from datetime import timezone From e53ffa97db20621ed6e477dc957bd5fd6ec0dd8d Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 6 May 2025 21:11:57 +0000 Subject: [PATCH 22/55] Begin canonical dataset factory creation. --- src/mdio/core/v1/__init__.py | 7 +- src/mdio/core/v1/factory.py | 299 ++++++++++++++++++------- tests/unit/test_template_factory.py | 336 +++++++++++----------------- 3 files changed, 353 insertions(+), 289 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index 5f7513dc..62ff4b60 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -13,12 +13,11 @@ make_named_dimension, make_variable, ) -from .factory import AbstractTemplateFactory - +from .factory import MDIOSchemaType +from .factory import SCHEMA_TEMPLATE_MAP __all__ = [ "MDIODatasetBuilder", - "AbstractTemplateFactory", "make_coordinate", "make_dataset", "make_dataset_metadata", @@ -26,4 +25,6 @@ "make_variable", "mdio", "write_mdio_metadata", + "MDIOSchemaType", + "SCHEMA_TEMPLATE_MAP", ] diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 0dc4bf3f..67085072 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -1,105 +1,238 @@ -"""Factory implementation for MDIO v1 datasets.""" +"""MDIO factories for seismic data.""" +from __future__ import annotations + +import importlib +from datetime import UTC from datetime import datetime -from datetime import timezone +from enum import Enum +from enum import auto +from typing import Any +from typing import Dict from typing import List from typing import Optional -from mdio.schema.compressors import ZFP +from mdio.core.v1.builder import MDIODatasetBuilder from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata +from mdio.schema.v1.units import LengthUnitModel + + +class MDIOSchemaType(Enum): + """MDIO templates for specific data types.""" -from ._serializer import ( - make_coordinate, - make_dataset, - make_dataset_metadata, - make_named_dimension, - make_variable, -) + SEISMIC_3D_POST_STACK_GENERIC = auto() + SEISMIC_3D_POST_STACK_TIME = auto() + SEISMIC_3D_POST_STACK_DEPTH = auto() + SEISMIC_3D_PRE_STACK_CDP_TIME = auto() + SEISMIC_3D_PRE_STACK_CDP_DEPTH = auto() -class AbstractTemplateFactory: - """Abstract factory for creating MDIO datasets.""" +class Seismic3DPostStackGeneric: + """Generic 3D seismic post stack dataset.""" - def __init__(self, name: str): - """Initialize the factory. + def __init__(self): + """Initialize generic post stack dataset.""" + self._dim_names = ["inline", "crossline", "sample"] + self._chunks = [128, 128, 128] # 8 mb + self._coords = { + "cdp-x": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), + "cdp-y": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), + } + + def create( + self, + name: str, + shape: List[int], + header_fields: Dict[str, str], + create_coords: bool = False, + sample_format: Optional[str] = None, + chunks: Optional[List[int]] = None, + sample_units: Optional[Dict[str, str]] = None, + z_units: Optional[Dict[str, str]] = None, + attributes: Optional[Dict[str, Any]] = None, + ) -> Dataset: + """Create a generic seismic dataset schema. Args: name: Name of the dataset + shape: Shape of the dataset + header_fields: Header fields to include as a dict of field_name: dtype + create_coords: Whether to create coordinates + sample_format: Format of the samples + chunks: Chunk sizes + sample_units: Units for samples + z_units: Units for z-axis + attributes: Additional attributes to include in the dataset metadata + + Returns: + Dataset: The created dataset + """ + chunks = chunks or self._chunks + sample_format = sample_format or "float32" + + builder = MDIODatasetBuilder( + name=name, + attributes=attributes, + ) + + # Add dimensions + for dim_name, dim_size in zip(self._dim_names, shape): + builder.add_dimension( + name=dim_name, + size=dim_size, + data_type=ScalarType.UINT32, + metadata=z_units if dim_name == "sample" else None, + ) + + # Add coordinates if requested + if create_coords: + for coord_name, (format_, unit, coord_dims) in self._coords.items(): + builder.add_coordinate( + name=coord_name, + data_type=ScalarType(format_), + dimensions=coord_dims, + metadata=unit, + ) + + # Add seismic variable + builder.add_variable( + name="seismic", + data_type=ScalarType(sample_format), + dimensions=self._dim_names, + compressor=Blosc(name="blosc", algorithm="zstd"), + metadata=sample_units, + ) + + # Add header variable with structured dtype + header_dtype = StructuredType(fields=[ + {"name": field_name, "format": field_type} + for field_name, field_type in header_fields.items() + ]) + builder.add_variable( + name="headers", + data_type=header_dtype, + dimensions=self._dim_names[:-1], + compressor=Blosc(name="blosc"), + ) + + # Add trace mask + builder.add_variable( + name="trace_mask", + data_type=ScalarType.BOOL, + dimensions=self._dim_names[:-1], + compressor=Blosc(name="blosc"), + ) + + return builder.build() + + +class Seismic3DPostStack(Seismic3DPostStackGeneric): + """3D seismic post stack dataset with domain-specific attributes.""" + + def __init__(self, domain: str): + """Initialize post stack dataset. + + Args: + domain: Domain of the dataset (time/depth) """ - self.name = name - self.api_version = "1.0.0" # TODO: Pull from package metadata - self.created_on = datetime.now(timezone.utc) - self.dimensions: List[NamedDimension] = [] - self.coordinates: List[Coordinate] = [] - self.variables: List[Variable] = [] - - def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory": - """Add a dimension to the factory.""" - self.dimensions.append(make_named_dimension(name, size)) - return self - - def add_coordinate( + super().__init__() + self._dim_names = ["inline", "crossline", domain] + + def create( self, - name: str = "", - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] = None, - ) -> "AbstractTemplateFactory": - """Add a coordinate to the factory.""" - if name == "": - name = f"coord_{len(self.coordinates)}" - if dimensions is None: - dimensions = self.dimensions - self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) - return self - - def add_variable( + name: str, + shape: List[int], + header_fields: Dict[str, str], + create_coords: bool = False, + sample_format: Optional[str] = None, + chunks: Optional[List[int]] = None, + sample_units: Optional[Dict[str, str]] = None, + z_units: Optional[Dict[str, str]] = None, + attributes: Optional[Dict[str, Any]] = None, + ) -> Dataset: + """Create a seismic dataset schema with domain-specific attributes.""" + # Add seismic-specific attributes + seismic_attrs = { + "surveyDimensionality": "3D", + "ensembleType": "line", + "processingStage": "post-stack", + } + if attributes: + seismic_attrs.update(attributes) + + return super().create( + name=name, + shape=shape, + header_fields=header_fields, + create_coords=create_coords, + sample_format=sample_format, + chunks=chunks, + sample_units=sample_units, + z_units=z_units, + attributes=seismic_attrs, + ) + + +class Seismic3DPreStack(Seismic3DPostStackGeneric): + """3D seismic pre stack dataset.""" + + def __init__(self, domain: str): + """Initialize pre stack dataset. + + Args: + domain: Domain of the dataset (time/depth) + """ + super().__init__() + self._dim_names = ["inline", "crossline", "offset", domain] + self._chunks = [1, 1, 512, 4096] # 8 mb + self._coords = { + "cdp-x": ("float32", {"length": "m"}, self._dim_names[:-2]), + "cdp-y": ("float32", {"length": "m"}, self._dim_names[:-2]), + } + + def create( self, - name: str = "", - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None, - ) -> "AbstractTemplateFactory": - """Add a variable to the factory.""" - if name == "": - name = f"var_{len(self.variables)}" - if dimensions is None: - dimensions = self.dimensions - self.variables.append( - make_variable( - name, dimensions, data_type, compressor, coordinates, metadata - ) + name: str, + shape: List[int], + header_fields: Dict[str, str], + create_coords: bool = False, + sample_format: Optional[str] = None, + chunks: Optional[List[int]] = None, + sample_units: Optional[Dict[str, str]] = None, + z_units: Optional[Dict[str, str]] = None, + attributes: Optional[Dict[str, Any]] = None, + ) -> Dataset: + """Create a seismic dataset schema with pre-stack attributes.""" + # Add seismic-specific attributes + seismic_attrs = { + "surveyDimensionality": "3D", + "ensembleType": "cdp", + "processingStage": "pre-stack", + } + if attributes: + seismic_attrs.update(attributes) + + return super().create( + name=name, + shape=shape, + header_fields=header_fields, + create_coords=create_coords, + sample_format=sample_format, + chunks=chunks, + sample_units=sample_units, + z_units=z_units, + attributes=seismic_attrs, ) - return self - - def _compose_metadata(self): - """Compose the DatasetMetadata with the given name, api_version, and created_on.""" - return make_dataset_metadata(self.name, self.api_version, self.created_on) - - def _compose_variables(self) -> List[Variable]: - """Compose the Variables with the given parameters.""" - return [ - make_variable( - self.name, - self.dimensions, - self.data_type, - self.compressor, - self.coordinates, - self.metadata, - ) - ] - def make_dataset(self, variables: List[Variable]) -> Dataset: - """Create a Dataset with the given variables and metadata.""" - return Dataset(variables=variables, metadata=self._compose_metadata()) + +SCHEMA_TEMPLATE_MAP = { + MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC: Seismic3DPostStackGeneric(), + MDIOSchemaType.SEISMIC_3D_POST_STACK_TIME: Seismic3DPostStack("time"), + MDIOSchemaType.SEISMIC_3D_POST_STACK_DEPTH: Seismic3DPostStack("depth"), + MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_TIME: Seismic3DPreStack("time"), + MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_DEPTH: Seismic3DPreStack("depth"), +} diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index c091aeba..dca6118a 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -5,93 +5,103 @@ from datetime import datetime from datetime import timezone +import numpy as np import pytest +import xarray as xr from pydantic import ValidationError +from zarr import Array from mdio.core.v1._serializer import make_coordinate from mdio.core.v1._serializer import make_dataset from mdio.core.v1._serializer import make_dataset_metadata from mdio.core.v1._serializer import make_named_dimension from mdio.core.v1._serializer import make_variable +from mdio.core.v1.factory import MDIOSchemaType +from mdio.core.v1.factory import SCHEMA_TEMPLATE_MAP from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType - - -def test_make_named_dimension(): - """Test that make_named_dimension returns a NamedDimension object.""" - dim = make_named_dimension("time", 42) - assert isinstance(dim, NamedDimension) - assert dim.name == "time" - assert dim.size == 42 - - -def test_make_coordinate_minimal(): - """Test that make_coordinate returns a Coordinate object.""" - dims = ["x"] - coord = make_coordinate(name="x", dimensions=dims, data_type=ScalarType.FLOAT32) - assert coord.name == "x" - assert coord.dimensions == dims - assert coord.data_type == ScalarType.FLOAT32 - assert coord.metadata is None - - -def test_make_variable_minimal(): - """Test that make_variable returns a Variable object.""" - var = make_variable( - name="var", - dimensions=["x"], - data_type=ScalarType.FLOAT32, - compressor=None, - ) - assert var.name == "var" - assert var.dimensions == ["x"] - assert var.data_type == ScalarType.FLOAT32 - assert var.compressor is None - assert var.coordinates is None - assert var.metadata is None - - -def test_make_dataset_metadata_minimal(): - """Test that make_dataset_metadata returns a DatasetMetadata object.""" - ts = datetime.now(timezone.utc) - meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) - assert meta.name == "ds" - assert meta.api_version == "1" - assert meta.created_on == ts - assert meta.attributes is None - - -def test_make_dataset_minimal(): - """Test that make_dataset returns a Dataset object.""" - var = make_variable( - name="var", - dimensions=["x"], - data_type=ScalarType.FLOAT32, - compressor=None, - ) - ts = datetime.now(timezone.utc) - meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) - ds = make_dataset([var], meta) - assert ds.variables == [var] - assert ds.metadata == meta +from mdio.schema.v1.dataset import Dataset +from mdio.core.v1.builder import write_mdio_metadata + +# def test_make_named_dimension(): +# """Test that make_named_dimension returns a NamedDimension object.""" +# dim = make_named_dimension("time", 42) +# assert isinstance(dim, NamedDimension) +# assert dim.name == "time" +# assert dim.size == 42 + + +# def test_make_coordinate_minimal(): +# """Test that make_coordinate returns a Coordinate object.""" +# dims = ["x"] +# coord = make_coordinate(name="x", dimensions=dims, data_type=ScalarType.FLOAT32) +# assert coord.name == "x" +# assert coord.dimensions == dims +# assert coord.data_type == ScalarType.FLOAT32 +# assert coord.metadata is None + + +# def test_make_variable_minimal(): +# """Test that make_variable returns a Variable object.""" +# var = make_variable( +# name="var", +# dimensions=["x"], +# data_type=ScalarType.FLOAT32, +# compressor=None, +# ) +# assert var.name == "var" +# assert var.dimensions == ["x"] +# assert var.data_type == ScalarType.FLOAT32 +# assert var.compressor is None +# assert var.coordinates is None +# assert var.metadata is None + + +# def test_make_dataset_metadata_minimal(): +# """Test that make_dataset_metadata returns a DatasetMetadata object.""" +# ts = datetime.now(timezone.utc) +# meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) +# assert meta.name == "ds" +# assert meta.api_version == "1" +# assert meta.created_on == ts +# assert meta.attributes is None + + +# def test_make_dataset_minimal(): +# """Test that make_dataset returns a Dataset object.""" +# var = make_variable( +# name="var", +# dimensions=["x"], +# data_type=ScalarType.FLOAT32, +# compressor=None, +# ) +# ts = datetime.now(timezone.utc) +# meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) +# ds = make_dataset([var], meta) +# assert ds.variables == [var] +# assert ds.metadata == meta def test_make_toy_dataset(): - """Test that make_toy_dataset returns a Dataset object.""" - # Define core dimensions - inline = make_named_dimension("inline", 256) - crossline = make_named_dimension("crossline", 512) - depth = make_named_dimension("depth", 384) - - # Create dataset metadata - created = datetime.fromisoformat("2023-12-12T15:02:06.413469-06:00") - meta = make_dataset_metadata( + """Test that make_toy_dataset returns a Dataset object using the factory pattern.""" + # Create dataset using factory + template = SCHEMA_TEMPLATE_MAP[MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC] + ds = template.create( name="campos_3d", - api_version="1.0.0", - created_on=created, + shape=[256, 512, 384], # inline, crossline, time + header_fields={ + "cdp-x": "int32", + "cdp-y": "int32", + "elevation": "float16", + "some_scalar": "float16", + }, + create_coords=True, + sample_format="float32", + chunks=[128, 128, 128], + z_units={"unitsV1": {"time": "ms"}}, attributes={ "textHeader": [ "C01 .......................... ", @@ -102,142 +112,62 @@ def test_make_toy_dataset(): }, ) - # Image variable - image = make_variable( - name="image", - dimensions=[inline, crossline, depth], - data_type=ScalarType.FLOAT32, - compressor=Blosc(algorithm="zstd"), - coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], - metadata={ - "chunkGrid": { - "name": "regular", - "configuration": {"chunkShape": [128, 128, 128]}, - }, - "statsV1": { - "count": 100, - "sum": 1215.1, - "sumSquares": 125.12, - "min": 5.61, - "max": 10.84, - "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, - }, - "attributes": {"fizz": "buzz"}, - }, - ) - - # Velocity variable - velocity = make_variable( - name="velocity", - dimensions=[inline, crossline, depth], - data_type=ScalarType.FLOAT16, - compressor=None, - coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], - metadata={ - "chunkGrid": { - "name": "regular", - "configuration": {"chunkShape": [128, 128, 128]}, - }, - "unitsV1": {"speed": "m/s"}, - }, - ) - - # Inline-optimized image variable - image_inline = make_variable( - name="image_inline", - dimensions=[inline, crossline, depth], - data_type=ScalarType.FLOAT32, - compressor=ZFP(mode="fixed_accuracy", tolerance=0.05), - coordinates=["inline", "crossline", "depth", "cdp-x", "cdp-y"], - metadata={ - "chunkGrid": { - "name": "regular", - "configuration": {"chunkShape": [4, 512, 512]}, - } - }, - ) - - # Headers variable with structured dtype - headers_dtype = StructuredType( - fields=[ - {"name": "cdp-x", "format": ScalarType.INT32}, - {"name": "cdp-y", "format": ScalarType.INT32}, - {"name": "elevation", "format": ScalarType.FLOAT16}, - {"name": "some_scalar", "format": ScalarType.FLOAT16}, - ] - ) - image_headers = make_variable( - name="image_headers", - dimensions=[inline, crossline], - data_type=headers_dtype, - compressor=None, - coordinates=["inline", "crossline", "cdp-x", "cdp-y"], - metadata={ - "chunkGrid": { - "name": "regular", - "configuration": {"chunkShape": [128, 128]}, - } - }, - ) + # Print the JSON representation of the dataset schema + print("\nDataset Schema JSON:") + print(ds.model_dump_json(indent=2)) - # Standalone dimension arrays - # Tests that we don't need to pass a compressor. - inline_var = make_variable( - name="inline", dimensions=[inline], data_type=ScalarType.UINT32 - ) - # Tests that we can still pass it explicitly. - crossline_var = make_variable( - name="crossline", - dimensions=[crossline], - data_type=ScalarType.UINT32, - compressor=None, - ) - depth_var = make_variable( - name="depth", - dimensions=[depth], - data_type=ScalarType.UINT32, - metadata={"unitsV1": {"length": "m"}}, - ) - cdp_x = make_variable( - name="cdp-x", - dimensions=[inline, crossline], - data_type=ScalarType.FLOAT32, - metadata={"unitsV1": {"length": "m"}}, - ) - cdp_y = make_variable( - name="cdp-y", - dimensions=[inline, crossline], - data_type=ScalarType.FLOAT32, - metadata={"unitsV1": {"length": "m"}}, - ) + write_mdio_metadata(ds, "test_toy_dataset.mdio") - # Compose full dataset - ds = make_dataset( - [ - image, - velocity, - image_inline, - image_headers, - inline_var, - crossline_var, - depth_var, - cdp_x, - cdp_y, + # Verify metadata + assert ds.metadata.name == "campos_3d" + assert ds.metadata.api_version == "1.0.0" + assert ds.metadata.attributes == { + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... ", ], - meta, - ) - - assert ds.metadata == meta - assert len(ds.variables) == 9 - assert ds.variables[0] == image - assert ds.variables[1] == velocity - assert ds.variables[2] == image_inline - assert ds.variables[3] == image_headers - assert ds.variables[4] == inline_var - assert ds.variables[5] == crossline_var - assert ds.variables[6] == depth_var - assert ds.variables[7] == cdp_x - assert ds.variables[8] == cdp_y + "foo": "bar", + } + + # Verify variables + assert len(ds.variables) == 8 # seismic, headers, trace_mask, cdp-x, cdp-y + + # Find seismic variable + seismic = next(v for v in ds.variables if v.name == "seismic") + assert seismic.data_type == ScalarType.FLOAT32 + assert seismic.dimensions[0].name == "inline" + assert seismic.dimensions[1].name == "crossline" + assert seismic.dimensions[2].name == "sample" + assert seismic.compressor == Blosc(name="blosc", algorithm="zstd") + + # Find headers variable + headers = next(v for v in ds.variables if v.name == "headers") + assert isinstance(headers.data_type, StructuredType) + assert len(headers.data_type.fields) == 4 + assert headers.dimensions[0].name == "inline" + assert headers.dimensions[1].name == "crossline" + assert headers.compressor == Blosc(name="blosc") + + # Find trace mask + mask = next(v for v in ds.variables if v.name == "trace_mask") + assert mask.data_type == ScalarType.BOOL + assert mask.dimensions[0].name == "inline" + assert mask.dimensions[1].name == "crossline" + assert mask.compressor == Blosc(name="blosc") + + # Find coordinates + cdp_x = next(v for v in ds.variables if v.name == "cdp-x") + assert cdp_x.data_type == ScalarType.FLOAT64 + assert cdp_x.dimensions[0].name == "inline" + assert cdp_x.dimensions[1].name == "crossline" + assert cdp_x.metadata.units_v1.length == "m" + + cdp_y = next(v for v in ds.variables if v.name == "cdp-y") + assert cdp_y.data_type == ScalarType.FLOAT64 + assert cdp_y.dimensions[0].name == "inline" + assert cdp_y.dimensions[1].name == "crossline" + assert cdp_y.metadata.units_v1.length == "m" def test_named_dimension_invalid_size(): From a06f8b003cbf1d5510d918bda58a9e8c29bc0dbd Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 7 May 2025 13:42:27 +0000 Subject: [PATCH 23/55] Linting and cleanup --- src/mdio/core/v1/__init__.py | 15 +++-- src/mdio/core/v1/_serializer.py | 7 ++- src/mdio/core/v1/builder.py | 20 +++---- src/mdio/core/v1/factory.py | 64 ++++++++++---------- tests/integration/test_v1_serialization.py | 2 +- tests/unit/test_template_factory.py | 70 +--------------------- 6 files changed, 55 insertions(+), 123 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index 62ff4b60..f5c4a023 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -4,17 +4,16 @@ """ from ._overloads import mdio +from ._serializer import make_coordinate +from ._serializer import make_dataset +from ._serializer import make_dataset_metadata +from ._serializer import make_named_dimension +from ._serializer import make_variable from .builder import MDIODatasetBuilder from .builder import write_mdio_metadata -from ._serializer import ( - make_coordinate, - make_dataset, - make_dataset_metadata, - make_named_dimension, - make_variable, -) -from .factory import MDIOSchemaType from .factory import SCHEMA_TEMPLATE_MAP +from .factory import MDIOSchemaType + __all__ = [ "MDIODatasetBuilder", diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index dc859eb6..f177ad15 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -4,11 +4,14 @@ to Zarr storage. This API is not considered stable and may change without notice. """ -from typing import Any, Dict, List, Optional +from datetime import datetime +from typing import Any +from typing import Dict +from typing import List +from typing import Optional import numpy as np from numcodecs import Blosc as NumcodecsBlosc -from datetime import datetime from mdio.core.v1._overloads import mdio from mdio.schema.compressors import ZFP diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 03651730..fe4e5d7d 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -11,6 +11,7 @@ from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 +from mdio.core.v1._overloads import mdio from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dimension import NamedDimension @@ -18,20 +19,19 @@ from mdio.schema.dtype import StructuredType from mdio.schema.metadata import UserAttributes from mdio.schema.v1.dataset import Dataset +from mdio.schema.v1.units import AllUnits +from mdio.schema.v1.variable import Coordinate +from mdio.schema.v1.variable import Variable +from mdio.schema.v1.variable import VariableMetadata # Import factory functions from serializer module +from ._serializer import _construct_mdio_dataset +from ._serializer import _convert_compressor from ._serializer import make_coordinate from ._serializer import make_dataset from ._serializer import make_dataset_metadata from ._serializer import make_named_dimension from ._serializer import make_variable -from ._serializer import _convert_compressor -from ._serializer import _construct_mdio_dataset -from mdio.core.v1._overloads import mdio -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata class _BuilderState(Enum): @@ -227,9 +227,7 @@ def build(self) -> Dataset: return make_dataset(all_variables, metadata) -def write_mdio_metadata( - mdio_ds: Dataset, store: str, **kwargs: Any -) -> mdio.Dataset: +def write_mdio_metadata(mdio_ds: Dataset, store: str, **kwargs: Any) -> mdio.Dataset: """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata @@ -238,7 +236,7 @@ def write_mdio_metadata( Args: mdio_ds: The MDIO dataset to serialize store: Path to the Zarr store - **kwargs: Additional arguments to pass to to_zarr() + kwargs: Additional arguments to pass to to_mdio() Returns: The constructed xarray Dataset with MDIO extensions diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 67085072..fe9984c4 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -1,24 +1,18 @@ """MDIO factories for seismic data.""" +# TODO(BrianMichell): Add implementations for other canonical datasets. + from __future__ import annotations -import importlib -from datetime import UTC -from datetime import datetime from enum import Enum from enum import auto from typing import Any -from typing import Dict -from typing import List -from typing import Optional from mdio.core.v1.builder import MDIODatasetBuilder from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.units import LengthUnitModel class MDIOSchemaType(Enum): @@ -46,14 +40,14 @@ def __init__(self): def create( self, name: str, - shape: List[int], - header_fields: Dict[str, str], + shape: list[int], + header_fields: dict[str, str], create_coords: bool = False, - sample_format: Optional[str] = None, - chunks: Optional[List[int]] = None, - sample_units: Optional[Dict[str, str]] = None, - z_units: Optional[Dict[str, str]] = None, - attributes: Optional[Dict[str, Any]] = None, + sample_format: str | None = None, + chunks: list[int] | None = None, + sample_units: dict[str, str] | None = None, + z_units: dict[str, str] | None = None, + attributes: dict[str, Any] | None = None, ) -> Dataset: """Create a generic seismic dataset schema. @@ -80,7 +74,7 @@ def create( ) # Add dimensions - for dim_name, dim_size in zip(self._dim_names, shape): + for dim_name, dim_size in zip(self._dim_names, shape, strict=True): builder.add_dimension( name=dim_name, size=dim_size, @@ -108,10 +102,12 @@ def create( ) # Add header variable with structured dtype - header_dtype = StructuredType(fields=[ - {"name": field_name, "format": field_type} - for field_name, field_type in header_fields.items() - ]) + header_dtype = StructuredType( + fields=[ + {"name": field_name, "format": field_type} + for field_name, field_type in header_fields.items() + ] + ) builder.add_variable( name="headers", data_type=header_dtype, @@ -145,14 +141,14 @@ def __init__(self, domain: str): def create( self, name: str, - shape: List[int], - header_fields: Dict[str, str], + shape: list[int], + header_fields: dict[str, str], create_coords: bool = False, - sample_format: Optional[str] = None, - chunks: Optional[List[int]] = None, - sample_units: Optional[Dict[str, str]] = None, - z_units: Optional[Dict[str, str]] = None, - attributes: Optional[Dict[str, Any]] = None, + sample_format: str | None = None, + chunks: list[int] | None = None, + sample_units: dict[str, str] | None = None, + z_units: dict[str, str] | None = None, + attributes: dict[str, Any] | None = None, ) -> Dataset: """Create a seismic dataset schema with domain-specific attributes.""" # Add seismic-specific attributes @@ -197,14 +193,14 @@ def __init__(self, domain: str): def create( self, name: str, - shape: List[int], - header_fields: Dict[str, str], + shape: list[int], + header_fields: dict[str, str], create_coords: bool = False, - sample_format: Optional[str] = None, - chunks: Optional[List[int]] = None, - sample_units: Optional[Dict[str, str]] = None, - z_units: Optional[Dict[str, str]] = None, - attributes: Optional[Dict[str, Any]] = None, + sample_format: str | None = None, + chunks: list[int] | None = None, + sample_units: dict[str, str] | None = None, + z_units: dict[str, str] | None = None, + attributes: dict[str, Any] | None = None, ) -> Dataset: """Create a seismic dataset schema with pre-stack attributes.""" # Add seismic-specific attributes diff --git a/tests/integration/test_v1_serialization.py b/tests/integration/test_v1_serialization.py index 3921d785..3d117345 100644 --- a/tests/integration/test_v1_serialization.py +++ b/tests/integration/test_v1_serialization.py @@ -4,11 +4,11 @@ import numpy as np -from mdio.core.v1.builder import write_mdio_metadata from mdio.core.v1._serializer import make_dataset from mdio.core.v1._serializer import make_dataset_metadata from mdio.core.v1._serializer import make_named_dimension from mdio.core.v1._serializer import make_variable +from mdio.core.v1.builder import write_mdio_metadata from mdio.schema.compressors import ZFP from mdio.schema.compressors import Blosc from mdio.schema.dtype import ScalarType diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index dca6118a..36e796af 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -1,88 +1,24 @@ """Unit tests for MDIO v1 factory.""" -# TODO(BrianMichell): Update this to use canonical factory functions. +# TODO(BrianMichell): Update this to use canonical factory functions. from datetime import datetime from datetime import timezone -import numpy as np import pytest -import xarray as xr from pydantic import ValidationError -from zarr import Array from mdio.core.v1._serializer import make_coordinate from mdio.core.v1._serializer import make_dataset from mdio.core.v1._serializer import make_dataset_metadata from mdio.core.v1._serializer import make_named_dimension from mdio.core.v1._serializer import make_variable -from mdio.core.v1.factory import MDIOSchemaType +from mdio.core.v1.builder import write_mdio_metadata from mdio.core.v1.factory import SCHEMA_TEMPLATE_MAP -from mdio.schema.compressors import ZFP +from mdio.core.v1.factory import MDIOSchemaType from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension from mdio.schema.dtype import ScalarType from mdio.schema.dtype import StructuredType -from mdio.schema.v1.dataset import Dataset -from mdio.core.v1.builder import write_mdio_metadata - -# def test_make_named_dimension(): -# """Test that make_named_dimension returns a NamedDimension object.""" -# dim = make_named_dimension("time", 42) -# assert isinstance(dim, NamedDimension) -# assert dim.name == "time" -# assert dim.size == 42 - - -# def test_make_coordinate_minimal(): -# """Test that make_coordinate returns a Coordinate object.""" -# dims = ["x"] -# coord = make_coordinate(name="x", dimensions=dims, data_type=ScalarType.FLOAT32) -# assert coord.name == "x" -# assert coord.dimensions == dims -# assert coord.data_type == ScalarType.FLOAT32 -# assert coord.metadata is None - - -# def test_make_variable_minimal(): -# """Test that make_variable returns a Variable object.""" -# var = make_variable( -# name="var", -# dimensions=["x"], -# data_type=ScalarType.FLOAT32, -# compressor=None, -# ) -# assert var.name == "var" -# assert var.dimensions == ["x"] -# assert var.data_type == ScalarType.FLOAT32 -# assert var.compressor is None -# assert var.coordinates is None -# assert var.metadata is None - - -# def test_make_dataset_metadata_minimal(): -# """Test that make_dataset_metadata returns a DatasetMetadata object.""" -# ts = datetime.now(timezone.utc) -# meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) -# assert meta.name == "ds" -# assert meta.api_version == "1" -# assert meta.created_on == ts -# assert meta.attributes is None - - -# def test_make_dataset_minimal(): -# """Test that make_dataset returns a Dataset object.""" -# var = make_variable( -# name="var", -# dimensions=["x"], -# data_type=ScalarType.FLOAT32, -# compressor=None, -# ) -# ts = datetime.now(timezone.utc) -# meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) -# ds = make_dataset([var], meta) -# assert ds.variables == [var] -# assert ds.metadata == meta def test_make_toy_dataset(): From b856f583f2286db0ae3ae18e56078ace8be21c1c Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 7 May 2025 13:47:45 +0000 Subject: [PATCH 24/55] Fix factory default dtype --- src/mdio/core/v1/factory.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index fe9984c4..1af5f799 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -33,8 +33,8 @@ def __init__(self): self._dim_names = ["inline", "crossline", "sample"] self._chunks = [128, 128, 128] # 8 mb self._coords = { - "cdp-x": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), - "cdp-y": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), + "cdp-x": ("float64", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), + "cdp-y": ("float64", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), } def create( @@ -186,8 +186,8 @@ def __init__(self, domain: str): self._dim_names = ["inline", "crossline", "offset", domain] self._chunks = [1, 1, 512, 4096] # 8 mb self._coords = { - "cdp-x": ("float32", {"length": "m"}, self._dim_names[:-2]), - "cdp-y": ("float32", {"length": "m"}, self._dim_names[:-2]), + "cdp-x": ("float64", {"length": "m"}, self._dim_names[:-2]), + "cdp-y": ("float64", {"length": "m"}, self._dim_names[:-2]), } def create( From 27879786bdecdc56000abca2033dfc199d9c9634 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 7 May 2025 16:17:05 +0000 Subject: [PATCH 25/55] Fix for renamed directory --- src/mdio/core/v1/_serializer.py | 24 +- src/mdio/core/v1/builder.py | 22 +- src/mdio/core/v1/factory.py | 8 +- src/mdio/schemas/v1/template_builder.py | 224 ----------------- src/mdio/schemas/v1/template_factory.py | 227 ------------------ tests/integration/test_v1_serialization.py | 8 +- tests/unit/schema/v1/test_template_builder.py | 8 +- tests/unit/test_template_factory.py | 6 +- 8 files changed, 38 insertions(+), 489 deletions(-) delete mode 100644 src/mdio/schemas/v1/template_builder.py delete mode 100644 src/mdio/schemas/v1/template_factory.py diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index f177ad15..0b12fc24 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -14,18 +14,18 @@ from numcodecs import Blosc as NumcodecsBlosc from mdio.core.v1._overloads import mdio -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes -from mdio.schema.v1.dataset import Dataset as MDIODataset -from mdio.schema.v1.dataset import DatasetMetadata -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset import Dataset as MDIODataset +from mdio.schemas.v1.dataset import DatasetMetadata +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable +from mdio.schemas.v1.variable import VariableMetadata try: diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index fe4e5d7d..421cffac 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -12,17 +12,17 @@ from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 from mdio.core.v1._overloads import mdio -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes -from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable +from mdio.schemas.v1.variable import VariableMetadata # Import factory functions from serializer module from ._serializer import _construct_mdio_dataset diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 1af5f799..6ce8fa48 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -9,10 +9,10 @@ from typing import Any from mdio.core.v1.builder import MDIODatasetBuilder -from mdio.schema.compressors import Blosc -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.v1.dataset import Dataset +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset class MDIOSchemaType(Enum): diff --git a/src/mdio/schemas/v1/template_builder.py b/src/mdio/schemas/v1/template_builder.py deleted file mode 100644 index e5fad4a1..00000000 --- a/src/mdio/schemas/v1/template_builder.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Builder pattern implementation for MDIO v1 schema models.""" - -from datetime import datetime -from datetime import timezone -from enum import Enum -from enum import auto -from typing import Any -from typing import Dict -from typing import List -from typing import Optional - -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes -from mdio.schema.v1.dataset import Dataset - -# from mdio.schema.v1.dataset import DatasetMetadata -from .factory import make_coordinate -from .factory import make_dataset -from .factory import make_dataset_metadata -from .factory import make_named_dimension -from .factory import make_variable -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata - - -# from pydantic import AwareDatetime - - -class _BuilderState(Enum): - """States for the template builder.""" - - INITIAL = auto() - HAS_DIMENSIONS = auto() - HAS_COORDINATES = auto() - HAS_VARIABLES = auto() - - -class Builder: - """Builder for creating MDIO datasets with enforced build order. - - The build order is: - 1. Must add dimensions first via add_dimension() - 2. Can optionally add coordinates via add_coordinate() - 3. Must add variables via add_variable() - 4. Must call build() to create the dataset. - """ - - def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): - """Initialize the builder. - - Args: - name: Name of the dataset - attributes: Optional attributes for the dataset - """ - self.name = name - self.api_version = "1.0.0" # TODO: Pull from package metadata - self.created_on = datetime.now(timezone.utc) - self.attributes = attributes - self._dimensions: List[NamedDimension] = [] - self._coordinates: List[Coordinate] = [] - self._variables: List[Variable] = [] - self._state = _BuilderState.INITIAL - self._unnamed_variable_counter = 0 - - def add_dimension( - self, - name: str, - size: int, - long_name: str = None, - data_type: ScalarType | StructuredType = ScalarType.INT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "Builder": - """Add a dimension. - - This must be called at least once before adding coordinates or variables. - - Args: - name: Name of the dimension - size: Size of the dimension - long_name: Optional long name for the dimension variable - data_type: Data type for the dimension variable (defaults to INT32) - metadata: Optional metadata for the dimension variable - - Returns: - self: Returns self for method chaining - """ - # Create the dimension - dimension = make_named_dimension(name, size) - self._dimensions.append(dimension) - - # Create a variable for the dimension - dim_var = make_variable( - name=name, - long_name=long_name, - dimensions=[dimension], - data_type=data_type, - metadata=metadata, - ) - self._variables.append(dim_var) - - self._state = _BuilderState.HAS_DIMENSIONS - return self - - def add_coordinate( - self, - name: str = "", - *, - long_name: str = None, - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, - ) -> "Builder": - """Add a coordinate after adding at least one dimension.""" - if self._state == _BuilderState.INITIAL: - raise ValueError( - "Must add at least one dimension before adding coordinates" - ) - - if name == "": - name = f"coord_{len(self._coordinates)}" - if dimensions is None: - dimensions = self._dimensions - if isinstance(metadata, dict): - metadata = [metadata] - - # Convert string dimension names to NamedDimension objects - dim_objects = [] - for dim in dimensions: - if isinstance(dim, str): - dim_obj = next((d for d in self._dimensions if d.name == dim), None) - if dim_obj is None: - raise ValueError(f"Dimension {dim!r} not found") - dim_objects.append(dim_obj) - else: - dim_objects.append(dim) - - self._coordinates.append( - make_coordinate( - name=name, - long_name=long_name, - dimensions=dim_objects, - data_type=data_type, - metadata=metadata, - ) - ) - self._state = _BuilderState.HAS_COORDINATES - return self - - def add_variable( - self, - name: str = "", - *, - long_name: str = None, - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None, - ) -> "Builder": - """Add a variable after adding at least one dimension.""" - if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before adding variables") - - if name == "": - name = f"var_{self._unnamed_variable_counter}" - self._unnamed_variable_counter += 1 - if dimensions is None: - dimensions = self._dimensions - - # Convert string dimension names to NamedDimension objects - dim_objects = [] - for dim in dimensions: - if isinstance(dim, str): - dim_obj = next((d for d in self._dimensions if d.name == dim), None) - if dim_obj is None: - raise ValueError(f"Dimension {dim!r} not found") - dim_objects.append(dim_obj) - else: - dim_objects.append(dim) - - self._variables.append( - make_variable( - name=name, - long_name=long_name, - dimensions=dim_objects, - data_type=data_type, - compressor=compressor, - coordinates=coordinates, - metadata=metadata, - ) - ) - self._state = _BuilderState.HAS_VARIABLES - return self - - def build(self) -> Dataset: - """Build the final dataset.""" - if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before building") - - metadata = make_dataset_metadata( - self.name, self.api_version, self.created_on, self.attributes - ) - - # Add coordinates as variables to the dataset - # We make a copy so that coordinates are not duplicated if the builder is reused - all_variables = self._variables.copy() - for coord in self._coordinates: - # Convert coordinate to variable - coord_var = make_variable( - name=coord.name, - long_name=coord.long_name, - dimensions=coord.dimensions, - data_type=coord.data_type, - metadata=coord.metadata, - ) - all_variables.append(coord_var) - - return make_dataset(all_variables, metadata) \ No newline at end of file diff --git a/src/mdio/schemas/v1/template_factory.py b/src/mdio/schemas/v1/template_factory.py deleted file mode 100644 index 7e0f26f8..00000000 --- a/src/mdio/schemas/v1/template_factory.py +++ /dev/null @@ -1,227 +0,0 @@ -"""Factory methods for MDIO v1 schema models.""" - -from datetime import datetime -from datetime import timezone -from typing import Any -from typing import Dict -from typing import List -from typing import Optional - -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dimension import NamedDimension -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.metadata import UserAttributes -from mdio.schema.v1.dataset import Dataset -from mdio.schema.v1.dataset import DatasetMetadata -from mdio.schema.v1.units import AllUnits -from mdio.schema.v1.variable import Coordinate -from mdio.schema.v1.variable import Variable -from mdio.schema.v1.variable import VariableMetadata - - -def make_named_dimension(name: str, size: int) -> NamedDimension: - """Create a NamedDimension with the given name and size.""" - return NamedDimension(name=name, size=size) - - -def make_coordinate( - name: str, - dimensions: List[NamedDimension | str], - data_type: ScalarType | StructuredType, - long_name: str = None, - metadata: Optional[List[AllUnits | UserAttributes]] = None, -) -> Coordinate: - """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" - return Coordinate( - name=name, - long_name=long_name, - dimensions=dimensions, - data_type=data_type, - metadata=metadata, - ) - - -def make_variable( # noqa: C901 - name: str, - dimensions: List[NamedDimension | str], - data_type: ScalarType | StructuredType, - long_name: str = None, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[ - List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata - ] = None, -) -> Variable: - """Create a Variable with the given parameters. - - Args: - name: Name of the variable - dimensions: List of dimensions - data_type: Data type of the variable - long_name: Optional long name - compressor: Optional compressor - coordinates: Optional list of coordinates - metadata: Optional metadata - - Returns: - Variable: A Variable instance with the specified parameters. - - Raises: - TypeError: If the metadata type is not supported. - """ - # Convert metadata to VariableMetadata if needed - var_metadata = None - if metadata: - if isinstance(metadata, list): - # Convert list of metadata to dict - metadata_dict = {} - for md in metadata: - if isinstance(md, AllUnits): - # For units_v1, if it's a single element, use it directly - if isinstance(md.units_v1, list) and len(md.units_v1) == 1: - metadata_dict["units_v1"] = md.units_v1[0] - else: - metadata_dict["units_v1"] = md.units_v1 - elif isinstance(md, UserAttributes): - # For attributes, if it's a single element, use it directly - attrs = md.model_dump(by_alias=True) - if isinstance(attrs, list) and len(attrs) == 1: - metadata_dict["attributes"] = attrs[0] - else: - metadata_dict["attributes"] = attrs - var_metadata = VariableMetadata(**metadata_dict) - elif isinstance(metadata, dict): - # Convert camelCase keys to snake_case for VariableMetadata - converted_dict = {} - for key, value in metadata.items(): - if key == "unitsV1": - # For units_v1, if it's a single element array, use the element directly - if isinstance(value, list) and len(value) == 1: - converted_dict["units_v1"] = value[0] - else: - converted_dict["units_v1"] = value - else: - converted_dict[key] = value - var_metadata = VariableMetadata(**converted_dict) - elif isinstance(metadata, VariableMetadata): - var_metadata = metadata - else: - raise TypeError(f"Unsupported metadata type: {type(metadata)}") - - # Create the variable with all attributes explicitly set - return Variable( - name=name, - long_name=long_name, - dimensions=dimensions, - data_type=data_type, - compressor=compressor, - coordinates=coordinates, - metadata=var_metadata, - ) - - -def make_dataset_metadata( - name: str, - api_version: str, - created_on: datetime, - attributes: Optional[Dict[str, Any]] = None, -) -> DatasetMetadata: - """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" - return DatasetMetadata( - name=name, - api_version=api_version, - created_on=created_on, - attributes=attributes, - ) - - -def make_dataset( - variables: List[Variable], - metadata: DatasetMetadata, -) -> Dataset: - """Create a Dataset with the given variables and metadata.""" - return Dataset( - variables=variables, - metadata=metadata, - ) - - -class AbstractTemplateFactory: - """Abstract factory for creating MDIO datasets.""" - - def __init__(self, name: str): - """Initialize the factory. - - Args: - name: Name of the dataset - """ - self.name = name - self.api_version = "1.0.0" # TODO: Pull from package metadata - self.created_on = datetime.now(timezone.utc) - self.dimensions: List[NamedDimension] = [] - self.coordinates: List[Coordinate] = [] - self.variables: List[Variable] = [] - - def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory": - """Add a dimension to the factory.""" - self.dimensions.append(make_named_dimension(name, size)) - return self - - def add_coordinate( - self, - name: str = "", - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] = None, - ) -> "AbstractTemplateFactory": - """Add a coordinate to the factory.""" - if name == "": - name = f"coord_{len(self.coordinates)}" - if dimensions is None: - dimensions = self.dimensions - self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) - return self - - def add_variable( - self, - name: str = "", - dimensions: Optional[List[NamedDimension | str]] = None, - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None, - ) -> "AbstractTemplateFactory": - """Add a variable to the factory.""" - if name == "": - name = f"var_{len(self.variables)}" - if dimensions is None: - dimensions = self.dimensions - self.variables.append( - make_variable( - name, dimensions, data_type, compressor, coordinates, metadata - ) - ) - return self - - def _compose_metadata(self) -> DatasetMetadata: - """Compose the DatasetMetadata with the given name, api_version, and created_on.""" - return make_dataset_metadata(self.name, self.api_version, self.created_on) - - def _compose_variables(self) -> List[Variable]: - """Compose the Variables with the given parameters.""" - return [ - make_variable( - self.name, - self.dimensions, - self.data_type, - self.compressor, - self.coordinates, - self.metadata, - ) - ] - - def make_dataset(self, variables: List[Variable]) -> Dataset: - """Create a Dataset with the given variables and metadata.""" - return Dataset(variables=variables, metadata=self._compose_metadata()) \ No newline at end of file diff --git a/tests/integration/test_v1_serialization.py b/tests/integration/test_v1_serialization.py index 3d117345..cc104a10 100644 --- a/tests/integration/test_v1_serialization.py +++ b/tests/integration/test_v1_serialization.py @@ -9,10 +9,10 @@ from mdio.core.v1._serializer import make_named_dimension from mdio.core.v1._serializer import make_variable from mdio.core.v1.builder import write_mdio_metadata -from mdio.schema.compressors import ZFP -from mdio.schema.compressors import Blosc -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType def build_toy_dataset(): diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index a1df28ac..04be1f73 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -7,10 +7,10 @@ from mdio.core.v1.builder import MDIODatasetBuilder from mdio.core.v1.builder import _BuilderState from mdio.core.v1.builder import write_mdio_metadata -from mdio.schema.compressors import Blosc -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType -from mdio.schema.v1.dataset import Dataset +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset def test_builder_initialization(): diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 36e796af..66e5708c 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -16,9 +16,9 @@ from mdio.core.v1.builder import write_mdio_metadata from mdio.core.v1.factory import SCHEMA_TEMPLATE_MAP from mdio.core.v1.factory import MDIOSchemaType -from mdio.schema.compressors import Blosc -from mdio.schema.dtype import ScalarType -from mdio.schema.dtype import StructuredType +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType def test_make_toy_dataset(): From a70f63cede438c46168ece31a056cad56596721b Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 7 May 2025 16:27:46 +0000 Subject: [PATCH 26/55] Linting auto-fixes --- src/mdio/core/v1/__init__.py | 1 - src/mdio/core/v1/_serializer.py | 31 ++++++--------- src/mdio/core/v1/builder.py | 39 +++++++------------ src/mdio/core/v1/factory.py | 11 ------ tests/unit/schema/v1/test_template_builder.py | 16 ++------ tests/unit/test_template_factory.py | 8 ++-- 6 files changed, 32 insertions(+), 74 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index f5c4a023..afaa3bac 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -14,7 +14,6 @@ from .factory import SCHEMA_TEMPLATE_MAP from .factory import MDIOSchemaType - __all__ = [ "MDIODatasetBuilder", "make_coordinate", diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index 0b12fc24..fb836392 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -6,9 +6,6 @@ from datetime import datetime from typing import Any -from typing import Dict -from typing import List -from typing import Optional import numpy as np from numcodecs import Blosc as NumcodecsBlosc @@ -27,7 +24,6 @@ from mdio.schemas.v1.variable import Variable from mdio.schemas.v1.variable import VariableMetadata - try: import zfpy as zfpy_base # Base library from numcodecs import ZFPY # Codec @@ -44,10 +40,10 @@ def make_named_dimension(name: str, size: int) -> NamedDimension: def make_coordinate( name: str, - dimensions: List[NamedDimension | str], + dimensions: list[NamedDimension | str], data_type: ScalarType | StructuredType, long_name: str = None, - metadata: Optional[List[AllUnits | UserAttributes]] = None, + metadata: list[AllUnits | UserAttributes] | None = None, ) -> Coordinate: """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" return Coordinate( @@ -61,14 +57,12 @@ def make_coordinate( def make_variable( # noqa: C901 name: str, - dimensions: List[NamedDimension | str], + dimensions: list[NamedDimension | str], data_type: ScalarType | StructuredType, long_name: str = None, compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[ - List[AllUnits | UserAttributes] | Dict[str, Any] | VariableMetadata - ] = None, + coordinates: list[Coordinate | str] | None = None, + metadata: list[AllUnits | UserAttributes] | dict[str, Any] | VariableMetadata | None = None, ) -> Variable: """Create a Variable with the given parameters. @@ -142,7 +136,7 @@ def make_dataset_metadata( name: str, api_version: str, created_on: datetime, - attributes: Optional[Dict[str, Any]] = None, + attributes: dict[str, Any] | None = None, ) -> DatasetMetadata: """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" return DatasetMetadata( @@ -154,7 +148,7 @@ def make_dataset_metadata( def make_dataset( - variables: List[Variable], + variables: list[Variable], metadata: DatasetMetadata, ) -> MDIODataset: """Create a Dataset with the given variables and metadata.""" @@ -174,7 +168,7 @@ def _convert_compressor( shuffle=model.shuffle.value, blocksize=model.blocksize if model.blocksize > 0 else 0, ) - elif isinstance(model, ZFP): + if isinstance(model, ZFP): if zfpy_base is None or ZFPY is None: raise ImportError("zfpy and numcodecs are required to use ZFP compression") return ZFPY( @@ -183,10 +177,9 @@ def _convert_compressor( rate=model.rate, precision=model.precision, ) - elif model is None: + if model is None: return None - else: - raise TypeError(f"Unsupported compressor model: {type(model)}") + raise TypeError(f"Unsupported compressor model: {type(model)}") def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 @@ -214,9 +207,7 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 # Build data variables data_vars: dict[str, mdio.DataArray] = {} for var in mdio_ds.variables: - dim_names = [ - d.name if isinstance(d, NamedDimension) else d for d in var.dimensions - ] + dim_names = [d.name if isinstance(d, NamedDimension) else d for d in var.dimensions] shape = tuple(dims[name] for name in dim_names) dt = var.data_type if isinstance(dt, ScalarType): diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 421cffac..4aafbffe 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -1,13 +1,10 @@ """Builder pattern implementation for MDIO v1 schema models.""" +from datetime import UTC from datetime import datetime -from datetime import timezone from enum import Enum from enum import auto from typing import Any -from typing import Dict -from typing import List -from typing import Optional from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding # noqa: F401 @@ -54,20 +51,14 @@ class MDIODatasetBuilder: 4. Must call build() to create the dataset. """ - def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None): - """Initialize the builder. - - Args: - name: Name of the dataset - attributes: Optional attributes for the dataset - """ + def __init__(self, name: str, attributes: dict[str, Any] | None = None): self.name = name self.api_version = "1.0.0" # TODO: Pull from package metadata - self.created_on = datetime.now(timezone.utc) + self.created_on = datetime.now(UTC) self.attributes = attributes - self._dimensions: List[NamedDimension] = [] - self._coordinates: List[Coordinate] = [] - self._variables: List[Variable] = [] + self._dimensions: list[NamedDimension] = [] + self._coordinates: list[Coordinate] = [] + self._variables: list[Variable] = [] self._state = _BuilderState.INITIAL self._unnamed_variable_counter = 0 @@ -77,7 +68,7 @@ def add_dimension( size: int, long_name: str = None, data_type: ScalarType | StructuredType = ScalarType.INT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + metadata: list[AllUnits | UserAttributes] | None | dict[str, Any] = None, ) -> "MDIODatasetBuilder": """Add a dimension. @@ -115,15 +106,13 @@ def add_coordinate( name: str = "", *, long_name: str = None, - dimensions: Optional[List[NamedDimension | str]] = None, + dimensions: list[NamedDimension | str] | None = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, - metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None, + metadata: list[AllUnits | UserAttributes] | None | dict[str, Any] = None, ) -> "MDIODatasetBuilder": """Add a coordinate after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: - raise ValueError( - "Must add at least one dimension before adding coordinates" - ) + raise ValueError("Must add at least one dimension before adding coordinates") if name == "": name = f"coord_{len(self._coordinates)}" @@ -160,11 +149,11 @@ def add_variable( name: str = "", *, long_name: str = None, - dimensions: Optional[List[NamedDimension | str]] = None, + dimensions: list[NamedDimension | str] | None = None, data_type: ScalarType | StructuredType = ScalarType.FLOAT32, compressor: Blosc | ZFP | None = None, - coordinates: Optional[List[Coordinate | str]] = None, - metadata: Optional[VariableMetadata] = None, + coordinates: list[Coordinate | str] | None = None, + metadata: VariableMetadata | None = None, ) -> "MDIODatasetBuilder": """Add a variable after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: @@ -236,7 +225,7 @@ def write_mdio_metadata(mdio_ds: Dataset, store: str, **kwargs: Any) -> mdio.Dat Args: mdio_ds: The MDIO dataset to serialize store: Path to the Zarr store - kwargs: Additional arguments to pass to to_mdio() + **kwargs: Additional arguments to pass to to_mdio() Returns: The constructed xarray Dataset with MDIO extensions diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 6ce8fa48..20a2822e 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -29,7 +29,6 @@ class Seismic3DPostStackGeneric: """Generic 3D seismic post stack dataset.""" def __init__(self): - """Initialize generic post stack dataset.""" self._dim_names = ["inline", "crossline", "sample"] self._chunks = [128, 128, 128] # 8 mb self._coords = { @@ -130,11 +129,6 @@ class Seismic3DPostStack(Seismic3DPostStackGeneric): """3D seismic post stack dataset with domain-specific attributes.""" def __init__(self, domain: str): - """Initialize post stack dataset. - - Args: - domain: Domain of the dataset (time/depth) - """ super().__init__() self._dim_names = ["inline", "crossline", domain] @@ -177,11 +171,6 @@ class Seismic3DPreStack(Seismic3DPostStackGeneric): """3D seismic pre stack dataset.""" def __init__(self, domain: str): - """Initialize pre stack dataset. - - Args: - domain: Domain of the dataset (time/depth) - """ super().__init__() self._dim_names = ["inline", "crossline", "offset", domain] self._chunks = [1, 1, 512, 4096] # 8 mb diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 04be1f73..88edc3a6 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -87,9 +87,7 @@ def test_coordinate_builder_state(): builder = builder.add_dimension("y", 200) # Adding coordinate should change state to HAS_COORDINATES - builder = builder.add_coordinate( - "x_coord", dimensions=["x"], long_name="X Coordinate" - ) + builder = builder.add_coordinate("x_coord", dimensions=["x"], long_name="X Coordinate") assert builder._state == _BuilderState.HAS_COORDINATES assert len(builder._coordinates) == 1 assert builder._coordinates[0].name == "x_coord" @@ -109,9 +107,7 @@ def test_variable_builder_state(): builder = MDIODatasetBuilder("test_dataset") # Should not be able to add variables before dimensions - with pytest.raises( - ValueError, match="Must add at least one dimension before adding variables" - ): + with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): builder.add_variable("data", dimensions=["x"]) # Add dimension first @@ -203,15 +199,11 @@ def test_build_order_enforcement(): builder.add_coordinate("x_coord", dimensions=["x"]) # Should not be able to add variables before dimensions - with pytest.raises( - ValueError, match="Must add at least one dimension before adding variables" - ): + with pytest.raises(ValueError, match="Must add at least one dimension before adding variables"): builder.add_variable("data", dimensions=["x"]) # Should not be able to build without dimensions - with pytest.raises( - ValueError, match="Must add at least one dimension before building" - ): + with pytest.raises(ValueError, match="Must add at least one dimension before building"): builder.build() diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 66e5708c..effb5773 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -2,8 +2,8 @@ # TODO(BrianMichell): Update this to use canonical factory functions. +from datetime import UTC from datetime import datetime -from datetime import timezone import pytest from pydantic import ValidationError @@ -118,9 +118,7 @@ def test_make_coordinate_invalid_types(): """Test that make_coordinate raises a ValidationError for invalid types.""" # dimensions must be a list of NamedDimension or str with pytest.raises(ValidationError): - make_coordinate( - name="coord", dimensions="notalist", data_type=ScalarType.FLOAT32 - ) + make_coordinate(name="coord", dimensions="notalist", data_type=ScalarType.FLOAT32) # data_type must be a valid ScalarType with pytest.raises(ValidationError): make_coordinate(name="coord", dimensions=["x"], data_type="notatype") @@ -156,7 +154,7 @@ def test_make_dataset_metadata_invalid_created_on(): def test_make_dataset_invalid_variables_and_metadata_types(): """Test that make_dataset raises a ValidationError.""" - ts = datetime.now(timezone.utc) + ts = datetime.now(UTC) meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) var = make_variable( name="var", From 3c9f0ec7c132960088ceb3921f44e22ce8232100 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 7 May 2025 18:43:39 +0000 Subject: [PATCH 27/55] Linting --- src/mdio/core/v1/_overloads.py | 28 +++++++-- src/mdio/core/v1/_serializer.py | 16 +++-- src/mdio/core/v1/builder.py | 34 +++++++---- src/mdio/core/v1/factory.py | 19 +++--- tests/integration/test_v1_serialization.py | 6 +- tests/unit/schema/v1/test_template_builder.py | 59 ++++++++++--------- tests/unit/test_template_factory.py | 20 +++---- 7 files changed, 111 insertions(+), 71 deletions(-) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index e38eec6a..af2856b8 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -5,6 +5,8 @@ 2. To simplify the API for users where it makes sense (e.g. MDIO v1 uses Zarr and not HDF5). """ +from collections.abc import Mapping + import xarray as xr from xarray import DataArray as _DataArray from xarray import Dataset as _Dataset @@ -15,7 +17,12 @@ class MDIODataset(_Dataset): __slots__ = () - def to_mdio(self, store=None, *args, **kwargs): + def to_mdio( + self, + store: str | None = None, + *args: str | int | float | bool, + **kwargs: Mapping[str, str | int | float | bool], + ) -> None: """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") return super().to_zarr(*args, store=store, **kwargs) @@ -26,7 +33,12 @@ class MDIODataArray(_DataArray): __slots__ = () - def to_mdio(self, store=None, *args, **kwargs): + def to_mdio( + self, + store: str | None = None, + *args: str | int | float | bool, + **kwargs: Mapping[str, str | int | float | bool], + ) -> None: """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") return super().to_zarr(*args, store=store, **kwargs) @@ -39,7 +51,13 @@ class MDIO: DataArray = MDIODataArray @staticmethod - def open(store, *args, engine="zarr", consolidated=False, **kwargs): + def open( + store: str, + *args: str | int | float | bool, + engine: str = "zarr", + consolidated: bool = False, + **kwargs: Mapping[str, str | int | float | bool], + ) -> MDIODataset: """Open a Zarr store as an MDIODataset. Casts the returned xarray.Dataset (and its variables) to the MDIO subclasses. @@ -55,9 +73,9 @@ def open(store, *args, engine="zarr", consolidated=False, **kwargs): # Cast Dataset to MDIODataset ds.__class__ = MDIODataset # Cast each DataArray in data_vars and coords - for _name, var in ds.data_vars.items(): + for _name, var in ds.data_vars.values(): var.__class__ = MDIODataArray - for _name, coord in ds.coords.items(): + for _name, coord in ds.coords.values(): coord.__class__ = MDIODataArray return ds diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index fb836392..3a3a2e8e 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -55,7 +55,7 @@ def make_coordinate( ) -def make_variable( # noqa: C901 +def make_variable( # noqa: PLR0913 PLR0912 name: str, dimensions: list[NamedDimension | str], data_type: ScalarType | StructuredType, @@ -118,7 +118,8 @@ def make_variable( # noqa: C901 elif isinstance(metadata, VariableMetadata): var_metadata = metadata else: - raise TypeError(f"Unsupported metadata type: {type(metadata)}") + msg = f"Unsupported metadata type: {type(metadata)}" + raise TypeError(msg) # Create the variable with all attributes explicitly set return Variable( @@ -170,7 +171,8 @@ def _convert_compressor( ) if isinstance(model, ZFP): if zfpy_base is None or ZFPY is None: - raise ImportError("zfpy and numcodecs are required to use ZFP compression") + msg = "zfpy and numcodecs are required to use ZFP compression" + raise ImportError(msg) return ZFPY( mode=model.mode.value, tolerance=model.tolerance, @@ -179,10 +181,11 @@ def _convert_compressor( ) if model is None: return None - raise TypeError(f"Unsupported compressor model: {type(model)}") + msg = f"Unsupported compressor model: {type(model)}" + raise TypeError(msg) -def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 +def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: PLR0912 """Build an MDIO dataset with correct dimensions and dtypes. This internal function constructs the underlying data structure for an MDIO dataset, @@ -215,7 +218,8 @@ def _construct_mdio_dataset(mdio_ds: MDIODataset) -> mdio.Dataset: # noqa: C901 elif isinstance(dt, StructuredType): dtype = np.dtype([(f.name, f.format.value) for f in dt.fields]) else: - raise TypeError(f"Unsupported data_type: {dt}") + msg = f"Unsupported data_type: {dt}" + raise TypeError(msg) arr = np.zeros(shape, dtype=dtype) data_array = mdio.DataArray(arr, dims=dim_names) data_array.encoding["fill_value"] = 0.0 diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 4aafbffe..124f4e18 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -1,5 +1,6 @@ """Builder pattern implementation for MDIO v1 schema models.""" +from collections.abc import Mapping from datetime import UTC from datetime import datetime from enum import Enum @@ -53,7 +54,7 @@ class MDIODatasetBuilder: def __init__(self, name: str, attributes: dict[str, Any] | None = None): self.name = name - self.api_version = "1.0.0" # TODO: Pull from package metadata + self.api_version = "1.0.0" # TODO(BrianMichell, #0): Pull from package metadata self.created_on = datetime.now(UTC) self.attributes = attributes self._dimensions: list[NamedDimension] = [] @@ -62,7 +63,7 @@ def __init__(self, name: str, attributes: dict[str, Any] | None = None): self._state = _BuilderState.INITIAL self._unnamed_variable_counter = 0 - def add_dimension( + def add_dimension( # noqa: PLR0913 self, name: str, size: int, @@ -101,7 +102,7 @@ def add_dimension( self._state = _BuilderState.HAS_DIMENSIONS return self - def add_coordinate( + def add_coordinate( # noqa: PLR0913 self, name: str = "", *, @@ -112,7 +113,8 @@ def add_coordinate( ) -> "MDIODatasetBuilder": """Add a coordinate after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before adding coordinates") + msg = "Must add at least one dimension before adding coordinates" + raise ValueError(msg) if name == "": name = f"coord_{len(self._coordinates)}" @@ -127,7 +129,8 @@ def add_coordinate( if isinstance(dim, str): dim_obj = next((d for d in self._dimensions if d.name == dim), None) if dim_obj is None: - raise ValueError(f"Dimension {dim!r} not found") + msg = f"Dimension {dim!r} not found" + raise ValueError(msg) dim_objects.append(dim_obj) else: dim_objects.append(dim) @@ -144,7 +147,7 @@ def add_coordinate( self._state = _BuilderState.HAS_COORDINATES return self - def add_variable( + def add_variable( # noqa: PLR0913 self, name: str = "", *, @@ -157,7 +160,8 @@ def add_variable( ) -> "MDIODatasetBuilder": """Add a variable after adding at least one dimension.""" if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before adding variables") + msg = "Must add at least one dimension before adding variables" + raise ValueError(msg) if name == "": name = f"var_{self._unnamed_variable_counter}" @@ -171,7 +175,8 @@ def add_variable( if isinstance(dim, str): dim_obj = next((d for d in self._dimensions if d.name == dim), None) if dim_obj is None: - raise ValueError(f"Dimension {dim!r} not found") + msg = f"Dimension {dim!r} not found" + raise ValueError(msg) dim_objects.append(dim_obj) else: dim_objects.append(dim) @@ -193,7 +198,8 @@ def add_variable( def build(self) -> Dataset: """Build the final dataset.""" if self._state == _BuilderState.INITIAL: - raise ValueError("Must add at least one dimension before building") + msg = "Must add at least one dimension before building" + raise ValueError(msg) metadata = make_dataset_metadata( self.name, self.api_version, self.created_on, self.attributes @@ -216,7 +222,11 @@ def build(self) -> Dataset: return make_dataset(all_variables, metadata) -def write_mdio_metadata(mdio_ds: Dataset, store: str, **kwargs: Any) -> mdio.Dataset: +def write_mdio_metadata( + mdio_ds: Dataset, + store: str, + **kwargs: Mapping[str, str | int | float | bool], +) -> mdio.Dataset: """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata @@ -238,7 +248,7 @@ def _generate_encodings() -> dict: Returns: Dictionary mapping variable names to their encoding configurations. """ - # TODO: Re-enable chunk_key_encoding when supported by xarray + # TODO(Anybody, #10274): Re-enable chunk_key_encoding when supported by xarray # dimension_separator_encoding = V2ChunkKeyEncoding(separator="/").to_dict() global_encodings = {} for var in mdio_ds.variables: @@ -250,7 +260,7 @@ def _generate_encodings() -> dict: chunks = var.metadata.chunk_grid.configuration.chunk_shape global_encodings[var.name] = { "chunks": chunks, - # TODO: Re-enable chunk_key_encoding when supported by xarray + # TODO(Anybody, #10274): Re-enable chunk_key_encoding when supported by xarray # "chunk_key_encoding": dimension_separator_encoding, "_FillValue": fill_value, "dtype": var.data_type, diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index 20a2822e..b4d6ae8c 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -1,18 +1,21 @@ """MDIO factories for seismic data.""" -# TODO(BrianMichell): Add implementations for other canonical datasets. +# TODO(BrianMichell, #535): Add implementations for other canonical datasets. from __future__ import annotations from enum import Enum from enum import auto +from typing import TYPE_CHECKING from typing import Any from mdio.core.v1.builder import MDIODatasetBuilder from mdio.schemas.compressors import Blosc from mdio.schemas.dtype import ScalarType from mdio.schemas.dtype import StructuredType -from mdio.schemas.v1.dataset import Dataset + +if TYPE_CHECKING: + from mdio.schemas.v1.dataset import Dataset class MDIOSchemaType(Enum): @@ -28,7 +31,7 @@ class MDIOSchemaType(Enum): class Seismic3DPostStackGeneric: """Generic 3D seismic post stack dataset.""" - def __init__(self): + def __init__(self) -> None: self._dim_names = ["inline", "crossline", "sample"] self._chunks = [128, 128, 128] # 8 mb self._coords = { @@ -36,7 +39,7 @@ def __init__(self): "cdp-y": ("float64", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), } - def create( + def create( # noqa: PLR0913 self, name: str, shape: list[int], @@ -128,11 +131,11 @@ def create( class Seismic3DPostStack(Seismic3DPostStackGeneric): """3D seismic post stack dataset with domain-specific attributes.""" - def __init__(self, domain: str): + def __init__(self, domain: str) -> None: super().__init__() self._dim_names = ["inline", "crossline", domain] - def create( + def create( # noqa: PLR0913 self, name: str, shape: list[int], @@ -170,7 +173,7 @@ def create( class Seismic3DPreStack(Seismic3DPostStackGeneric): """3D seismic pre stack dataset.""" - def __init__(self, domain: str): + def __init__(self, domain: str) -> None: super().__init__() self._dim_names = ["inline", "crossline", "offset", domain] self._chunks = [1, 1, 512, 4096] # 8 mb @@ -179,7 +182,7 @@ def __init__(self, domain: str): "cdp-y": ("float64", {"length": "m"}, self._dim_names[:-2]), } - def create( + def create( # noqa: PLR0913 self, name: str, shape: list[int], diff --git a/tests/integration/test_v1_serialization.py b/tests/integration/test_v1_serialization.py index cc104a10..3b16388c 100644 --- a/tests/integration/test_v1_serialization.py +++ b/tests/integration/test_v1_serialization.py @@ -1,9 +1,11 @@ """Integration test for MDIO v1 Xarray Zarr constructor.""" from datetime import datetime +from pathlib import Path import numpy as np +from mdio.core.v1._overloads import MDIODataset from mdio.core.v1._serializer import make_dataset from mdio.core.v1._serializer import make_dataset_metadata from mdio.core.v1._serializer import make_named_dimension @@ -15,7 +17,7 @@ from mdio.schemas.dtype import StructuredType -def build_toy_dataset(): +def build_toy_dataset() -> MDIODataset: """Build a toy dataset for testing.""" # core dimensions inline = make_named_dimension("inline", 256) @@ -165,7 +167,7 @@ def build_toy_dataset(): ) -def test_to_mdio_writes_and_returns_mdio(tmp_path): +def test_to_mdio_writes_and_returns_mdio(tmp_path: Path) -> None: """Test that to_mdio writes and returns an mdio.Dataset.""" ds_in = build_toy_dataset() store_path = tmp_path / "toy.mdio" diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 88edc3a6..80c4dbb4 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -1,6 +1,7 @@ """Unit tests for MDIO v1 schema builder.""" from datetime import datetime +from pathlib import Path import pytest @@ -13,7 +14,7 @@ from mdio.schemas.v1.dataset import Dataset -def test_builder_initialization(): +def test_builder_initialization() -> None: """Test basic builder initialization.""" builder = MDIODatasetBuilder("test_dataset") assert builder.name == "test_dataset" @@ -25,17 +26,17 @@ def test_builder_initialization(): assert builder._state == _BuilderState.INITIAL -def test_dimension_builder_state(): +def test_dimension_builder_state() -> None: """Test dimension builder state transitions and functionality.""" builder = MDIODatasetBuilder("test_dataset") # First dimension should change state to HAS_DIMENSIONS and create a variable builder = builder.add_dimension("x", 100, long_name="X Dimension") assert builder._state == _BuilderState.HAS_DIMENSIONS - assert len(builder._dimensions) == 1 - assert len(builder._variables) == 1 + assert len(builder._dimensions) == 1 # noqa: PLR2004 + assert len(builder._variables) == 1 # noqa: PLR2004 assert builder._dimensions[0].name == "x" - assert builder._dimensions[0].size == 100 + assert builder._dimensions[0].size == 100 # noqa: PLR2004 assert builder._variables[0].name == "x" assert builder._variables[0].long_name == "X Dimension" assert builder._variables[0].data_type == ScalarType.INT32 @@ -44,16 +45,16 @@ def test_dimension_builder_state(): # Adding another dimension should maintain state and create another variable builder = builder.add_dimension("y", 200, data_type=ScalarType.UINT32) assert builder._state == _BuilderState.HAS_DIMENSIONS - assert len(builder._dimensions) == 2 - assert len(builder._variables) == 2 + assert len(builder._dimensions) == 2 # noqa: PLR2004 + assert len(builder._variables) == 2 # noqa: PLR2004 assert builder._dimensions[1].name == "y" - assert builder._dimensions[1].size == 200 + assert builder._dimensions[1].size == 200 # noqa: PLR2004 assert builder._variables[1].name == "y" assert builder._variables[1].data_type == ScalarType.UINT32 assert builder._variables[1].dimensions[0].name == "y" -def test_dimension_with_metadata(): +def test_dimension_with_metadata() -> None: """Test adding dimensions with custom metadata.""" builder = MDIODatasetBuilder("test_dataset") @@ -72,7 +73,7 @@ def test_dimension_with_metadata(): assert depth_var.metadata.units_v1.length == "m" -def test_coordinate_builder_state(): +def test_coordinate_builder_state() -> None: """Test coordinate builder state transitions and functionality.""" builder = MDIODatasetBuilder("test_dataset") @@ -89,7 +90,7 @@ def test_coordinate_builder_state(): # Adding coordinate should change state to HAS_COORDINATES builder = builder.add_coordinate("x_coord", dimensions=["x"], long_name="X Coordinate") assert builder._state == _BuilderState.HAS_COORDINATES - assert len(builder._coordinates) == 1 + assert len(builder._coordinates) == 1 # noqa: PLR2004 assert builder._coordinates[0].name == "x_coord" assert builder._coordinates[0].long_name == "X Coordinate" assert builder._coordinates[0].dimensions[0].name == "x" @@ -97,12 +98,12 @@ def test_coordinate_builder_state(): # Adding another coordinate should maintain state builder = builder.add_coordinate("y_coord", dimensions=["y"]) assert builder._state == _BuilderState.HAS_COORDINATES - assert len(builder._coordinates) == 2 + assert len(builder._coordinates) == 2 # noqa: PLR2004 assert builder._coordinates[1].name == "y_coord" assert builder._coordinates[1].dimensions[0].name == "y" -def test_variable_builder_state(): +def test_variable_builder_state() -> None: """Test variable builder state transitions and functionality.""" builder = MDIODatasetBuilder("test_dataset") @@ -116,7 +117,8 @@ def test_variable_builder_state(): # Adding variable should change state to HAS_VARIABLES builder = builder.add_variable("data", dimensions=["x"], long_name="Data Variable") assert builder._state == _BuilderState.HAS_VARIABLES - assert len(builder._variables) == 2 # One for dimension, one for variable + # One for dimension, one for variable + assert len(builder._variables) == 2 # noqa: PLR2004 assert builder._variables[1].name == "data" assert builder._variables[1].long_name == "Data Variable" assert builder._variables[1].dimensions[0].name == "x" @@ -124,12 +126,13 @@ def test_variable_builder_state(): # Adding another variable should maintain state builder = builder.add_variable("data2", dimensions=["x"]) assert builder._state == _BuilderState.HAS_VARIABLES - assert len(builder._variables) == 3 # One for dimension, two for variables + # One for dimension, two for variables + assert len(builder._variables) == 3 # noqa: PLR2004 assert builder._variables[2].name == "data2" assert builder._variables[2].dimensions[0].name == "x" -def test_build_dataset(): +def test_build_dataset() -> None: """Test building a complete dataset.""" dataset = ( MDIODatasetBuilder("test_dataset") @@ -144,15 +147,15 @@ def test_build_dataset(): assert isinstance(dataset, Dataset) assert dataset.metadata.name == "test_dataset" # Two dimension variables + one data variable + two coordinate variables - assert len(dataset.variables) == 5 + assert len(dataset.variables) == 5 # noqa: PLR2004 assert dataset.variables[0].name == "x" assert dataset.variables[1].name == "y" assert dataset.variables[2].name == "data" assert dataset.variables[2].long_name == "Test Data" - assert len(dataset.variables[2].dimensions) == 2 + assert len(dataset.variables[2].dimensions) == 2 # noqa: PLR2004 -def test_auto_naming(): +def test_auto_naming() -> None: """Test automatic naming of coordinates and variables.""" dataset = ( MDIODatasetBuilder("test_dataset") @@ -169,7 +172,7 @@ def test_auto_naming(): assert dataset.variables[2].name == "var_1" -def test_default_dimensions(): +def test_default_dimensions() -> None: """Test that coordinates and variables use all dimensions by default.""" dataset = ( MDIODatasetBuilder("test_dataset") @@ -181,14 +184,14 @@ def test_default_dimensions(): ) # Two dimension variables + one data variable + one coordinate variable - assert len(dataset.variables) == 4 + assert len(dataset.variables) == 4 # noqa: PLR2004 assert dataset.variables[2].name == "var_0" - assert len(dataset.variables[2].dimensions) == 2 + assert len(dataset.variables[2].dimensions) == 2 # noqa: PLR2004 assert dataset.variables[2].dimensions[0].name == "x" assert dataset.variables[2].dimensions[1].name == "y" -def test_build_order_enforcement(): +def test_build_order_enforcement() -> None: """Test that the builder enforces the correct build order.""" builder = MDIODatasetBuilder("test_dataset") @@ -207,7 +210,7 @@ def test_build_order_enforcement(): builder.build() -def test_toy_example(tmp_path): +def test_toy_example(tmp_path: Path) -> None: """Test building a toy dataset with multiple variables and attributes.""" dataset = ( MDIODatasetBuilder( @@ -319,11 +322,11 @@ def test_toy_example(tmp_path): assert dataset.metadata.name == "campos_3d" assert dataset.metadata.api_version == "1.0.0" assert dataset.metadata.attributes["foo"] == "bar" - assert len(dataset.metadata.attributes["textHeader"]) == 3 + assert len(dataset.metadata.attributes["textHeader"]) == 3 # noqa: PLR2004 # Verify variables (including dimension variables) # 3 dimension variables + 4 data variables + 2 coordinate variables - assert len(dataset.variables) == 9 + assert len(dataset.variables) == 9 # noqa: PLR2004 # Verify dimension variables inline_var = next(v for v in dataset.variables if v.name == "inline") @@ -340,7 +343,7 @@ def test_toy_example(tmp_path): assert image.data_type == ScalarType.FLOAT32 assert isinstance(image.compressor, Blosc) assert image.compressor.algorithm == "zstd" - assert image.metadata.stats_v1.count == 100 + assert image.metadata.stats_v1.count == 100 # noqa: PLR2004 # Verify velocity variable velocity = next(v for v in dataset.variables if v.name == "velocity") @@ -357,5 +360,5 @@ def test_toy_example(tmp_path): # Verify image_headers variable headers = next(v for v in dataset.variables if v.name == "image_headers") assert isinstance(headers.data_type, StructuredType) - assert len(headers.data_type.fields) == 4 + assert len(headers.data_type.fields) == 4 # noqa: PLR2004 assert headers.data_type.fields[0].name == "cdp-x" diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index effb5773..6f57eb8c 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -1,6 +1,6 @@ """Unit tests for MDIO v1 factory.""" -# TODO(BrianMichell): Update this to use canonical factory functions. +# TODO(BrianMichell, #535): Update this to use canonical factory functions. from datetime import UTC from datetime import datetime @@ -21,7 +21,7 @@ from mdio.schemas.dtype import StructuredType -def test_make_toy_dataset(): +def test_make_toy_dataset() -> None: """Test that make_toy_dataset returns a Dataset object using the factory pattern.""" # Create dataset using factory template = SCHEMA_TEMPLATE_MAP[MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC] @@ -66,8 +66,8 @@ def test_make_toy_dataset(): "foo": "bar", } - # Verify variables - assert len(ds.variables) == 8 # seismic, headers, trace_mask, cdp-x, cdp-y + # Verify variables, coordinates, and dimensions + assert len(ds.variables) == 8 # noqa: PLR2004 # Find seismic variable seismic = next(v for v in ds.variables if v.name == "seismic") @@ -80,7 +80,7 @@ def test_make_toy_dataset(): # Find headers variable headers = next(v for v in ds.variables if v.name == "headers") assert isinstance(headers.data_type, StructuredType) - assert len(headers.data_type.fields) == 4 + assert len(headers.data_type.fields) == 4 # noqa: PLR2004 assert headers.dimensions[0].name == "inline" assert headers.dimensions[1].name == "crossline" assert headers.compressor == Blosc(name="blosc") @@ -106,7 +106,7 @@ def test_make_toy_dataset(): assert cdp_y.metadata.units_v1.length == "m" -def test_named_dimension_invalid_size(): +def test_named_dimension_invalid_size() -> None: """Test that make_named_dimension raises a ValidationError for invalid size.""" with pytest.raises(ValidationError): make_named_dimension("dim", 0) @@ -114,7 +114,7 @@ def test_named_dimension_invalid_size(): make_named_dimension("dim", -1) -def test_make_coordinate_invalid_types(): +def test_make_coordinate_invalid_types() -> None: """Test that make_coordinate raises a ValidationError for invalid types.""" # dimensions must be a list of NamedDimension or str with pytest.raises(ValidationError): @@ -124,7 +124,7 @@ def test_make_coordinate_invalid_types(): make_coordinate(name="coord", dimensions=["x"], data_type="notatype") -def test_make_variable_invalid_args(): +def test_make_variable_invalid_args() -> None: """Test that make_variable raises a ValidationError for invalid types.""" # compressor must be Blosc, ZFP or None with pytest.raises(ValidationError): @@ -145,14 +145,14 @@ def test_make_variable_invalid_args(): ) -def test_make_dataset_metadata_invalid_created_on(): +def test_make_dataset_metadata_invalid_created_on() -> None: """Test that make_dataset_metadata raises a ValidationError for invalid created_on.""" # created_on must be an aware datetime with pytest.raises(ValidationError): make_dataset_metadata(name="ds", api_version="1", created_on="not-a-date") -def test_make_dataset_invalid_variables_and_metadata_types(): +def test_make_dataset_invalid_variables_and_metadata_types() -> None: """Test that make_dataset raises a ValidationError.""" ts = datetime.now(UTC) meta = make_dataset_metadata(name="ds", api_version="1", created_on=ts) From 433d9083b8a54b16f79d78936ecc9398dfb03a17 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 13:37:26 +0000 Subject: [PATCH 28/55] Fix todo linting error --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 759886c8..421cf931 100644 --- a/noxfile.py +++ b/noxfile.py @@ -200,7 +200,7 @@ def tests(session: Session) -> None: "pygments", "pytest-dependency", "s3fs", - "zfpy", # TODO(BrianMichell): Ensure this is pulling from the pyproject.toml + "zfpy", # TODO(BrianMichell): #0 Ensure this is pulling from the pyproject.toml ], ) From 046a73cd5c9d5a03002313b45e3a91511a8f1143 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 15:19:58 +0000 Subject: [PATCH 29/55] Fix test schema from snake to camel case --- tests/unit/test_schema.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 0b2a9f54..901c4653 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -5,43 +5,43 @@ TEST_SCHEMA = { "metadata": { "name": "test_dataset", - "api_version": "1.0.0", - "created_on": "2023-01-01T00:00:00Z", + "apiVersion": "1.0.0", + "createdOn": "2023-01-01T00:00:00Z", }, "variables": [ { "name": "actual_variable", - "data_type": "float32", + "dataType": "float32", "dimensions": ["dim0", "dim1"], "compressor": {"name": "blosc", "level": 3}, "coordinates": ["coord"], "metadata": { - "chunk_grid": { + "chunkGrid": { "name": "regular", - "configuration": {"chunk_shape": [10, 20]}, + "configuration": {"chunkShape": [10, 20]}, }, }, }, { "name": "coord", - "data_type": "float32", + "dataType": "float32", "dimensions": ["dim0", "dim1"], "metadata": { - "chunk_grid": { + "chunkGrid": { "name": "regular", - "configuration": {"chunk_shape": [10, 20]}, + "configuration": {"chunkShape": [10, 20]}, }, - "units_v1": {"length": "m"}, + "unitsV1": {"length": "m"}, }, }, { "name": "dim0", - "data_type": "int32", + "dataType": "int32", "dimensions": [{"name": "dim0", "size": 100}], }, { "name": "dim1", - "data_type": "int32", + "dataType": "int32", "dimensions": [{"name": "dim1", "size": 200}], }, ], From 8fa4b682470f825dda1081eb75248f8508c68948 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 15:20:21 +0000 Subject: [PATCH 30/55] Add JSON serialization test --- tests/unit/test_schema.py | 252 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 901c4653..33ed24f4 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1,5 +1,12 @@ """Test the schema for the v1 dataset.""" +import copy +import json +from pathlib import Path + +import pytest +from pydantic import ValidationError + from mdio.schemas.v1 import Dataset as V1Dataset TEST_SCHEMA = { @@ -51,3 +58,248 @@ def test_dataset_schema_validation() -> None: """Test that the dataset schema validates correctly.""" V1Dataset.model_validate(TEST_SCHEMA) + + +class TestV1DatasetJSONSerialization: + """Test JSON serialization capabilities of V1Dataset using Pydantic methods.""" + + @pytest.fixture + def sample_dataset(self) -> V1Dataset: + """Create a sample V1Dataset for testing.""" + # Use a deep copy to avoid test interference + return V1Dataset.model_validate(copy.deepcopy(TEST_SCHEMA)) + + def test_model_dump_json_default_camel_case(self, sample_dataset: V1Dataset) -> None: + """Test that JSON serialization uses camelCase by default.""" + json_str = sample_dataset.model_dump_json(by_alias=True) + + print(json_str) + + # Should be valid JSON + parsed = json.loads(json_str) + assert isinstance(parsed, dict) + + # Should contain expected top-level keys + assert "metadata" in parsed + assert "variables" in parsed + + # Metadata should have expected fields + assert parsed["metadata"]["name"] == "test_dataset" + assert parsed["metadata"]["apiVersion"] == "1.0.0" + assert parsed["metadata"]["createdOn"] == "2023-01-01T00:00:00Z" + + # Should have 4 variables + assert len(parsed["variables"]) == 4 # noqa: PLR2004 + + def test_model_dump_json_exclude_none(self, sample_dataset: V1Dataset) -> None: + """Test JSON serialization excluding None values.""" + json_str = sample_dataset.model_dump_json(exclude_none=True) + parsed = json.loads(json_str) # noqa: F841 + + # Should not contain null values in the JSON + json_str_lower = json_str.lower() + assert "null" not in json_str_lower + + def test_model_validate_json_basic(self) -> None: + """Test basic JSON deserialization using model_validate_json.""" + json_str = json.dumps(TEST_SCHEMA) + dataset = V1Dataset.model_validate_json(json_str) + + assert dataset.metadata.name == "test_dataset" + assert dataset.metadata.api_version == "1.0.0" + assert len(dataset.variables) == 4 # noqa: PLR2004 + + # Check first variable + var = dataset.variables[0] + assert var.name == "actual_variable" + assert var.data_type.value == "float32" + assert var.dimensions == ["dim0", "dim1"] + + def test_model_validate_json_invalid(self) -> None: + """Test JSON deserialization with invalid data.""" + invalid_json = '{"metadata": {"name": "test"}, "variables": []}' + + with pytest.raises(ValidationError) as exc_info: + V1Dataset.model_validate_json(invalid_json) + + # Should have validation errors + errors = exc_info.value.errors() + assert len(errors) > 0 + + def test_model_validate_json_malformed(self) -> None: + """Test JSON deserialization with malformed JSON.""" + malformed_json = '{"metadata": {"name": "test"' # Missing closing braces + + with pytest.raises(ValidationError): + V1Dataset.model_validate_json(malformed_json) + + def test_json_schema_generation(self) -> None: + """Test JSON schema generation using model_json_schema.""" + schema = V1Dataset.model_json_schema() + + # Should be a valid JSON schema + assert isinstance(schema, dict) + assert schema["type"] == "object" + assert "properties" in schema + + # Should have metadata and variables properties + properties = schema["properties"] + assert "metadata" in properties + assert "variables" in properties + + # Should have required fields + assert "required" in schema + required = schema["required"] + assert "metadata" in required + assert "variables" in required + + def test_json_schema_with_mode(self) -> None: + """Test JSON schema generation with different modes.""" + # Test validation mode (default) + validation_schema = V1Dataset.model_json_schema(mode="validation") + assert "properties" in validation_schema + + # Test serialization mode + serialization_schema = V1Dataset.model_json_schema(mode="serialization") + assert "properties" in serialization_schema + + def test_round_trip_consistency_default(self, sample_dataset: V1Dataset) -> None: + """Test that serialization -> deserialization preserves data (default camelCase).""" + # Export to JSON (default camelCase) + json_str = sample_dataset.model_dump_json() + + # Import from JSON + restored_dataset = V1Dataset.model_validate_json(json_str) + + # Export again + json_str2 = restored_dataset.model_dump_json() + + # Should be identical + assert json_str == json_str2 + + # Key properties should match + assert sample_dataset.metadata.name == restored_dataset.metadata.name + assert sample_dataset.metadata.api_version == restored_dataset.metadata.api_version + assert len(sample_dataset.variables) == len(restored_dataset.variables) + + # Variables should match + for orig_var, restored_var in zip( + sample_dataset.variables, restored_dataset.variables, strict=False + ): + assert orig_var.name == restored_var.name + assert orig_var.data_type == restored_var.data_type + assert orig_var.dimensions == restored_var.dimensions + + def test_round_trip_with_aliases(self, sample_dataset: V1Dataset) -> None: + """Test round-trip consistency when using aliases.""" + # Export with aliases (should be default now) + json_str = sample_dataset.model_dump_json() + + # Import (should handle aliases automatically) + restored_dataset = V1Dataset.model_validate_json(json_str) + + # Should preserve data + assert sample_dataset.metadata.name == restored_dataset.metadata.name + assert len(sample_dataset.variables) == len(restored_dataset.variables) + + def test_json_file_operations(self, sample_dataset: V1Dataset, tmp_path: Path) -> None: + """Test JSON serialization to/from files.""" + json_file = tmp_path / "test_dataset.json" + + # Write to file (using default camelCase) + json_str = sample_dataset.model_dump_json(indent=2) + json_file.write_text(json_str, encoding="utf-8") + + # Verify file exists and has content + assert json_file.exists() + assert json_file.stat().st_size > 0 + + # Read from file + file_content = json_file.read_text(encoding="utf-8") + restored_dataset = V1Dataset.model_validate_json(file_content) + + # Should match original + assert sample_dataset.metadata.name == restored_dataset.metadata.name + assert len(sample_dataset.variables) == len(restored_dataset.variables) + + def test_json_validation_without_instantiation(self) -> None: + """Test JSON validation without creating a dataset instance.""" + valid_json = json.dumps(TEST_SCHEMA) + + # This should not raise an exception + try: + V1Dataset.model_validate_json(valid_json) + validation_passed = True + except ValidationError: + validation_passed = False + + assert validation_passed + + def test_partial_json_validation(self) -> None: + """Test validation of partial/incomplete JSON data.""" + # Missing required fields + incomplete_schema = { + "metadata": { + "name": "test_dataset", + # Missing apiVersion and createdOn + }, + "variables": [], + } + + with pytest.raises(ValidationError) as exc_info: + V1Dataset.model_validate_json(json.dumps(incomplete_schema)) + + errors = exc_info.value.errors() + # Should have errors for missing required fields + error_fields = {error["loc"][-1] for error in errors} + assert "apiVersion" in error_fields or "api_version" in error_fields + + def test_json_with_extra_fields(self) -> None: + """Test JSON deserialization with extra fields.""" + # Use a copy to avoid modifying the global TEST_SCHEMA + schema_with_extra = copy.deepcopy(TEST_SCHEMA) + schema_with_extra["extra_field"] = "should_be_ignored" + schema_with_extra["metadata"]["extra_metadata"] = "also_ignored" + + # Should raise ValidationError because extra fields are forbidden + with pytest.raises(ValidationError) as exc_info: + V1Dataset.model_validate_json(json.dumps(schema_with_extra)) + + # Should have error about extra fields + errors = exc_info.value.errors() + assert any("extra_forbidden" in str(error) for error in errors) + + def test_json_schema_contains_examples(self) -> None: + """Test that generated JSON schema contains useful information.""" + schema = V1Dataset.model_json_schema() + + # Should have descriptions for properties + properties = schema.get("properties", {}) + if "metadata" in properties: + # Check if metadata has some schema information + metadata_schema = properties["metadata"] + assert isinstance(metadata_schema, dict) + + if "variables" in properties: + # Check if variables has some schema information + variables_schema = properties["variables"] + assert isinstance(variables_schema, dict) + assert variables_schema.get("type") == "array" + + def test_json_serialization_performance(self, sample_dataset: V1Dataset) -> None: + """Test that JSON serialization is reasonably performant.""" + import time + + # Time multiple serializations + start_time = time.time() + for _ in range(100): + json_str = sample_dataset.model_dump_json() + end_time = time.time() + + # Should complete 100 serializations in reasonable time (< 1 second) + elapsed = end_time - start_time + assert elapsed < 1.0 + + # Verify the JSON is still valid + parsed = json.loads(json_str) + assert parsed["metadata"]["name"] == "test_dataset" From 26d2db5aa1edbef8e39dc97b18356b30af881f1c Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 16:39:29 +0000 Subject: [PATCH 31/55] Ensure zarr v2 kwarg present and correct --- src/mdio/core/v1/_overloads.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index af2856b8..28601c31 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -25,6 +25,11 @@ def to_mdio( ) -> None: """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") + # Ensure zarr_version=2 by default unless explicitly overridden + zarr_version = kwargs.get("zarr_version", 2) + if zarr_version != 2: + raise ValueError("MDIO only supports zarr_version=2") + kwargs["zarr_version"] = zarr_version return super().to_zarr(*args, store=store, **kwargs) @@ -41,6 +46,11 @@ def to_mdio( ) -> None: """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" print("👋 hello world from mdio.to_mdio!") + # Ensure zarr_version=2 by default unless explicitly overridden + zarr_version = kwargs.get("zarr_version", 2) + if zarr_version != 2: + raise ValueError("MDIO only supports zarr_version=2") + kwargs["zarr_version"] = zarr_version return super().to_zarr(*args, store=store, **kwargs) From 17e4029234078017cb3a5c71905bdb222328d650 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 16:39:38 +0000 Subject: [PATCH 32/55] Fix iterator --- src/mdio/core/v1/_overloads.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index 28601c31..e7bcd0fc 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -83,9 +83,10 @@ def open( # Cast Dataset to MDIODataset ds.__class__ = MDIODataset # Cast each DataArray in data_vars and coords - for _name, var in ds.data_vars.values(): + + for _name, var in ds.data_vars.items(): var.__class__ = MDIODataArray - for _name, coord in ds.coords.values(): + for _name, coord in ds.coords.items(): coord.__class__ = MDIODataArray return ds From d83abbd3b3d1622caaf2469a9096f2054950cd1a Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Tue, 27 May 2025 21:04:54 +0000 Subject: [PATCH 33/55] Resolve serialization issues to/from JSON --- src/mdio/core/v1/_serializer.py | 74 ++++----- src/mdio/schemas/core.py | 79 +++++++--- tests/unit/test_schema.py | 270 ++++++++++++++++++++++++++++++++ 3 files changed, 366 insertions(+), 57 deletions(-) diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index 3a3a2e8e..271f1b57 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -23,6 +23,8 @@ from mdio.schemas.v1.variable import Coordinate from mdio.schemas.v1.variable import Variable from mdio.schemas.v1.variable import VariableMetadata +from mdio.schemas.chunk_grid import * +from mdio.schemas.v1.stats import * try: import zfpy as zfpy_base # Base library @@ -46,13 +48,14 @@ def make_coordinate( metadata: list[AllUnits | UserAttributes] | None = None, ) -> Coordinate: """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" - return Coordinate( - name=name, - long_name=long_name, - dimensions=dimensions, - data_type=data_type, - metadata=metadata, - ) + coordinate_dict = { + "name": name, + "longName": long_name, + "dimensions": dimensions, + "dataType": data_type, + "metadata": metadata, + } + return Coordinate(**coordinate_dict) def make_variable( # noqa: PLR0913 PLR0912 @@ -81,52 +84,48 @@ def make_variable( # noqa: PLR0913 PLR0912 Raises: TypeError: If the metadata type is not supported. """ - # Convert metadata to VariableMetadata if needed + + # TODO(BrianMichell) #0: I suspect that this is only partially correct... + + def _to_serializable(val: Any) -> Any: + return val.model_dump(mode="json", by_alias=True) if hasattr(val, "model_dump") else val + var_metadata = None if metadata: if isinstance(metadata, list): - # Convert list of metadata to dict metadata_dict = {} for md in metadata: if isinstance(md, AllUnits): - # For units_v1, if it's a single element, use it directly - if isinstance(md.units_v1, list) and len(md.units_v1) == 1: - metadata_dict["units_v1"] = md.units_v1[0] - else: - metadata_dict["units_v1"] = md.units_v1 + val = md.units_v1 + if isinstance(val, list) and len(val) == 1: + val = val[0] + metadata_dict["unitsV1"] = val elif isinstance(md, UserAttributes): - # For attributes, if it's a single element, use it directly - attrs = md.model_dump(by_alias=True) - if isinstance(attrs, list) and len(attrs) == 1: - metadata_dict["attributes"] = attrs[0] - else: - metadata_dict["attributes"] = attrs + attrs = _to_serializable(md) + metadata_dict["attributes"] = attrs[0] if isinstance(attrs, list) and len(attrs) == 1 else attrs var_metadata = VariableMetadata(**metadata_dict) + elif isinstance(metadata, dict): - # Convert camelCase keys to snake_case for VariableMetadata converted_dict = {} for key, value in metadata.items(): if key == "unitsV1": - # For units_v1, if it's a single element array, use the element directly - if isinstance(value, list) and len(value) == 1: - converted_dict["units_v1"] = value[0] - else: - converted_dict["units_v1"] = value + val = value[0] if isinstance(value, list) and len(value) == 1 else value + converted_dict["unitsV1"] = _to_serializable(val) else: converted_dict[key] = value var_metadata = VariableMetadata(**converted_dict) + elif isinstance(metadata, VariableMetadata): var_metadata = metadata + else: - msg = f"Unsupported metadata type: {type(metadata)}" - raise TypeError(msg) + raise TypeError(f"Unsupported metadata type: {type(metadata)}") - # Create the variable with all attributes explicitly set return Variable( name=name, - long_name=long_name, + longName=long_name, dimensions=dimensions, - data_type=data_type, + dataType=data_type, compressor=compressor, coordinates=coordinates, metadata=var_metadata, @@ -140,12 +139,13 @@ def make_dataset_metadata( attributes: dict[str, Any] | None = None, ) -> DatasetMetadata: """Create a DatasetMetadata with name, api_version, created_on, and optional attributes.""" - return DatasetMetadata( - name=name, - api_version=api_version, - created_on=created_on, - attributes=attributes, - ) + dataset_metadata_dict = { + "name": name, + "apiVersion": api_version, + "createdOn": created_on, + "attributes": attributes, + } + return DatasetMetadata(**dataset_metadata_dict) def make_dataset( diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py index 34a09066..4bbdc6fb 100644 --- a/src/mdio/schemas/core.py +++ b/src/mdio/schemas/core.py @@ -8,32 +8,56 @@ from pydantic import BaseModel from pydantic import ConfigDict from pydantic.alias_generators import to_camel +from pydantic import Field -def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: - """Extract Pydantic BaseModel fields. +# def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: +# """Extract Pydantic BaseModel fields. - Args: - model: (Type) The model object for which the fields will be extracted. +# Args: +# model: (Type) The model object for which the fields will be extracted. - Returns: - A dictionary containing the fields of the model along with - their corresponding types and default values. +# Returns: +# A dictionary containing the fields of the model along with +# their corresponding types and default values. - Example: - >>> class MyModel(BaseModel): - ... name: str - ... age: int = 0 - ... - >>> model_fields(MyModel) - {'name': (str, ), 'age': (int, 0)} - """ - annotations = get_type_hints(model) +# Example: +# >>> class MyModel(BaseModel): +# ... name: str +# ... age: int = 0 +# ... +# >>> model_fields(MyModel) +# {'name': (str, ), 'age': (int, 0)} +# """ +# annotations = get_type_hints(model) - fields = {} - for field_name, field in model.model_fields.items(): - fields[field_name] = (annotations[field_name], field) +# fields = {} +# for field_name, field in model.model_fields.items(): +# fields[field_name] = (annotations[field_name], field) + +# return fields +# def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: +# """Return fields suitable for use in create_model with correct types and defaults.""" +# fields = {} +# for field_name, field_info in model.model_fields.items(): +# annotated_type = field_info.annotation +# default = field_info.default if field_info.default is not None else ... +# fields[field_name] = (annotated_type, Field(default, description=field_info.description)) +# return fields + +def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: + """Safely extract fields for create_model, preserving optionality and default behavior.""" + fields = {} + for field_name, field_info in model.model_fields.items(): + annotated_type = field_info.annotation + if field_info.is_required(): + fields[field_name] = (annotated_type, ...) + else: + fields[field_name] = ( + annotated_type, + Field(field_info.default, description=field_info.description), + ) return fields @@ -46,4 +70,19 @@ class StrictModel(BaseModel): class CamelCaseStrictModel(StrictModel): """A model with forbidden extras and camel case aliases.""" - model_config = ConfigDict(alias_generator=to_camel) + model_config = ConfigDict( + extra="forbid", + populate_by_name=False, + alias_generator=to_camel, + ser_json_by_alias=True, + ) + + def model_dump_json(self, *args, **kwargs): # type: ignore[override] + """Dump JSON using camelCase aliases and excluding None values by default.""" + # Ensure camelCase aliases + if "by_alias" not in kwargs: + kwargs["by_alias"] = True + # Exclude None fields to avoid nulls in output + if "exclude_none" not in kwargs: + kwargs["exclude_none"] = True + return super().model_dump_json(*args, **kwargs) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 33ed24f4..012e3099 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -303,3 +303,273 @@ def test_json_serialization_performance(self, sample_dataset: V1Dataset) -> None # Verify the JSON is still valid parsed = json.loads(json_str) assert parsed["metadata"]["name"] == "test_dataset" + + +class TestPydanticMDIORoundTrip: + """Test round-trip conversions between JSON and MDIO datasets using to_mdio.""" + + def test_json_to_mdio_dataset(self, tmp_path: Path) -> None: + """Test converting TEST_SCHEMA JSON to an MDIO dataset using to_mdio.""" + from mdio.core.v1._serializer import _construct_mdio_dataset + + output_path = tmp_path / "from_json.mdio" + # output_path = "test_mdio_from_json.mdio" + + # Step 1: Validate the TEST_SCHEMA JSON with Pydantic + dataset = V1Dataset.model_validate(TEST_SCHEMA) + + # Step 2: Convert to MDIO dataset using the internal constructor + mdio_dataset = _construct_mdio_dataset(dataset) + + # Step 3: Use to_mdio to save the dataset + mdio_dataset.to_mdio(store=str(output_path)) + + # Verify the dataset was created + assert output_path.exists() + + # Verify we can read it back + from mdio.core.v1 import mdio + with mdio.open(str(output_path)) as reader: + assert "actual_variable" in reader + assert "coord" in reader + assert "dim0" in reader.coords + assert "dim1" in reader.coords + assert reader.attrs["name"] == "test_dataset" + + def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: + """Test converting an MDIO dataset back to JSON (camelCase).""" + from mdio.core.v1._serializer import _construct_mdio_dataset + from mdio.core.v1 import mdio + + # Step 1: Create MDIO dataset from TEST_SCHEMA + dataset = V1Dataset.model_validate(TEST_SCHEMA) + mdio_dataset = _construct_mdio_dataset(dataset) + + mdio_path = tmp_path / "test_dataset.mdio" + mdio_dataset.to_mdio(store=str(mdio_path)) + + # Step 2: Read back the MDIO dataset + with mdio.open(str(mdio_path)) as reader: + # Step 3: Extract information to reconstruct Pydantic model + variables = [] + + # Add dimension variables + for dim_name in ["dim0", "dim1"]: + if dim_name in reader.coords: + coord = reader.coords[dim_name] + var_dict = { + "name": dim_name, + "dataType": str(coord.dtype), + "dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}], + } + variables.append(var_dict) + + # Add data variables with their metadata + for var_name in reader.data_vars: + var = reader[var_name] + var_dict = { + "name": var_name, + "dataType": str(var.dtype), + "dimensions": list(var.dims), + } + + # Reconstruct metadata based on original TEST_SCHEMA + if var_name == "coord": + var_dict["metadata"] = { + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [10, 20]}, + }, + "unitsV1": {"length": "m"}, + } + elif var_name == "actual_variable": + var_dict["compressor"] = {"name": "blosc", "level": 3} + var_dict["coordinates"] = ["coord"] + var_dict["metadata"] = { + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [10, 20]}, + }, + } + variables.append(var_dict) + + # Step 4: Create Pydantic model data (camelCase) + dataset_data = { + "metadata": { + "name": reader.attrs.get("name"), + "apiVersion": reader.attrs.get("apiVersion", "1.0.0"), + "createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"), + }, + "variables": variables + } + + # Step 5: Validate with Pydantic and serialize to JSON using by_alias=True + pydantic_dataset = V1Dataset.model_validate(dataset_data) + json_str = pydantic_dataset.model_dump_json(by_alias=True) + + # Verify it's valid JSON and camelCase + parsed = json.loads(json_str) + + print(parsed) + + assert "apiVersion" in parsed["metadata"] + assert "createdOn" in parsed["metadata"] + assert "dataType" in parsed["variables"][0] + + # Verify the conversion preserved data + assert pydantic_dataset.metadata.name == "test_dataset" + + def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: + """Test full round-trip: TEST_SCHEMA JSON -> MDIO -> JSON using to_mdio.""" + from mdio.core.v1._serializer import _construct_mdio_dataset + from mdio.core.v1 import mdio + + # Step 1: Start with TEST_SCHEMA (input JSON) + original_dataset = V1Dataset.model_validate(TEST_SCHEMA) + original_json = original_dataset.model_dump_json(by_alias=True) + original_parsed = json.loads(original_json) + + # Verify original is camelCase + assert "apiVersion" in original_parsed["metadata"] + assert "createdOn" in original_parsed["metadata"] + + # Step 2: Convert to MDIO dataset and save + mdio_dataset = _construct_mdio_dataset(original_dataset) + mdio_path = tmp_path / "round_trip.mdio" + mdio_dataset.to_mdio(store=str(mdio_path)) + + # Step 3: Read back from MDIO and convert to JSON + with mdio.open(str(mdio_path)) as reader: + # Reconstruct the schema structure + variables = [] + + # Add dimension variables + for dim_name in ["dim0", "dim1"]: + if dim_name in reader.coords: + coord = reader.coords[dim_name] + var_dict = { + "name": dim_name, + "dataType": str(coord.dtype), + "dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}], + } + variables.append(var_dict) + + # Add coordinate variables that are not dimensions + for coord_name, coord in reader.coords.items(): + if coord_name not in ["dim0", "dim1"]: # Skip dimension coordinates + var_dict = { + "name": coord_name, + "dataType": str(coord.dtype), + "dimensions": list(coord.dims), + } + + # Add metadata for coord variable from original TEST_SCHEMA + if coord_name == "coord": + var_dict["metadata"] = { + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [10, 20]}, + }, + "unitsV1": {"length": "m"}, + } + variables.append(var_dict) + + # Add data variables with original metadata + for var_name in reader.data_vars: + var = reader[var_name] + var_dict = { + "name": var_name, + "dataType": str(var.dtype), + "dimensions": list(var.dims), + } + + # Add original metadata back from TEST_SCHEMA + if var_name == "actual_variable": + var_dict["compressor"] = {"name": "blosc", "level": 3} + var_dict["coordinates"] = ["coord"] + var_dict["metadata"] = { + "chunkGrid": { + "name": "regular", + "configuration": {"chunkShape": [10, 20]}, + }, + } + variables.append(var_dict) + + # Create final dataset + final_data = { + "metadata": { + "name": reader.attrs.get("name", "test_dataset"), + "apiVersion": reader.attrs.get("apiVersion", "1.0.0"), + "createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"), + }, + "variables": variables + } + + final_dataset = V1Dataset.model_validate(final_data) + final_json = final_dataset.model_dump_json(by_alias=True) + final_parsed = json.loads(final_json) + + # Step 4: Verify round-trip integrity + assert final_parsed["metadata"]["name"] == original_parsed["metadata"]["name"] + assert final_parsed["metadata"]["apiVersion"] == original_parsed["metadata"]["apiVersion"] + + # Verify camelCase is preserved + assert "apiVersion" in final_parsed["metadata"] + assert "createdOn" in final_parsed["metadata"] + assert "dataType" in final_parsed["variables"][0] + + # Verify variable structure is preserved + original_var_names = {v["name"] for v in original_parsed["variables"]} + final_var_names = {v["name"] for v in final_parsed["variables"]} + + print(original_var_names) + print("=================================") + print(final_var_names) + + assert original_var_names == final_var_names + + def test_invalid_snake_case_json_fails(self) -> None: + """Test that snake_case JSON fails validation (negative test).""" + # Create snake_case version of TEST_SCHEMA (should fail) + invalid_snake_case_schema = { + "metadata": { + "name": "test_dataset", + "api_version": "1.0.0", # snake_case should fail + "created_on": "2023-01-01T00:00:00Z", # snake_case should fail + }, + "variables": [ + { + "name": "test_var", + "data_type": "float32", # snake_case should fail + "dimensions": ["dim0"], + } + ] + } + + # This should fail validation + with pytest.raises(ValidationError): + V1Dataset.model_validate(invalid_snake_case_schema) + + def test_camel_case_serialization_only(self) -> None: + """Test that serialization only produces camelCase output.""" + dataset = V1Dataset.model_validate(TEST_SCHEMA) + json_str = dataset.model_dump_json() + parsed = json.loads(json_str) + + # Verify camelCase fields are present + assert "apiVersion" in parsed["metadata"] + assert "createdOn" in parsed["metadata"] + + # Verify snake_case fields are NOT present + assert "api_version" not in parsed["metadata"] + assert "created_on" not in parsed["metadata"] + + # Check variables use camelCase + for var in parsed["variables"]: + assert "dataType" in var + assert "data_type" not in var + + # Check nested metadata if present + if "metadata" in var and "chunkGrid" in var["metadata"]: + assert "chunkGrid" in var["metadata"] + assert "chunk_grid" not in var["metadata"] From afbd5fa53782b4d5fde3952af059c97e9bcd55dd Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 14:29:24 +0000 Subject: [PATCH 34/55] Remove vestigial testing source file --- src/mdio/schemas/builder.py | 154 ------------------------------------ 1 file changed, 154 deletions(-) delete mode 100644 src/mdio/schemas/builder.py diff --git a/src/mdio/schemas/builder.py b/src/mdio/schemas/builder.py deleted file mode 100644 index 40908ff0..00000000 --- a/src/mdio/schemas/builder.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Schema builders.""" - -from __future__ import annotations - -from typing import Any - -from mdio.schemas import NamedDimension -from mdio.schemas.v1.dataset import Dataset -from mdio.schemas.v1.dataset import DatasetMetadata -from mdio.schemas.v1.variable import Variable -from mdio.schemas.v1.variable import VariableMetadata - - -class VariableBuilder: - """Dataset builder.""" - - def __init__(self) -> None: - self.name = None - self.long_name = None - self.dtype = None - self.chunks = None - self.dims = None - self.coords = None - self.compressor = None - self.meta_dict = None - - def set_name(self, name: str) -> VariableBuilder: - """Set variable name.""" - self.name = name - return self - - def set_long_name(self, long_name: str) -> VariableBuilder: - """Add long, descriptive name to the variable.""" - self.long_name = long_name - return self - - def set_compressor(self, compressor: dict[str, Any]) -> VariableBuilder: - """Add long, descriptive name to the variable.""" - self.compressor = compressor - return self - - def add_dimension(self, *dimensions: str | dict[str, int]) -> VariableBuilder: - """Add a dimension to the dataset.""" - if self.dims is None: - self.dims = [] - - if isinstance(dimensions[0], str): - dims = list(dimensions) - elif isinstance(dimensions[0], dict): - dims = [ - NamedDimension(name=name, size=size) - for dim in dimensions - for name, size in dim.items() - ] - else: - raise NotImplementedError - - self.dims.extend(dims) - return self - - def add_coordinate(self, *names: str) -> VariableBuilder: - """Add a coordinate to the variable.""" - if self.coords is None: - self.coords = [] - - self.coords.extend(names) - return self - - def set_format(self, format_: str | dict[str, str]) -> VariableBuilder: - """Set variable format.""" - if isinstance(format_, dict): - fields = [{"name": n, "format": f} for n, f in format_.items()] - format_ = {"fields": fields} - - self.dtype = format_ - return self - - def set_chunks(self, chunks: list[int]) -> VariableBuilder: - """Set variable chunks.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["chunkGrid"] = {"configuration": {"chunkShape": chunks}} - return self - - def set_units(self, units: dict[str, str]) -> VariableBuilder: - """Set variable units.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["unitsV1"] = units - return self - - def add_attribute(self, key: str, value: Any) -> VariableBuilder: # noqa: ANN401 - """Add a user attribute to the variable metadata.""" - if self.meta_dict is None: - self.meta_dict = {} - - self.meta_dict["attributes"] = {key: value} - return self - - def build(self) -> Variable: - """Build the dataset model.""" - if self.chunks is not None and len(self.chunks) != len(self.dims): - msg = "Variable chunks must have same number of dimensions." - raise ValueError(msg) - - var_kwargs = {} - - if self.meta_dict is not None: - var_kwargs["metadata"] = VariableMetadata.model_validate(self.meta_dict) - - return Variable( - name=self.name, - long_name=self.long_name, - data_type=self.dtype, - dimensions=self.dims, - coordinates=self.coords, - compressor=self.compressor, - **var_kwargs, - ) - - -class DatasetBuilder: - """Dataset builder.""" - - def __init__(self) -> None: - self.variables = [] - self.name = None - self.metadata = None - - def set_name(self, name: str) -> DatasetBuilder: - """Set dataset name.""" - self.name = name - return self - - def add_variable(self, variable: Variable) -> DatasetBuilder: - """Add a variable to the dataset.""" - self.variables.append(variable) - return self - - def add_variables(self, variables: list[Variable]) -> DatasetBuilder: - """Add multiple variables to the dataset.""" - [self.add_variable(variable) for variable in variables] - return self - - def set_metadata(self, metadata: DatasetMetadata) -> DatasetBuilder: - """Add a metadata to the dataset.""" - self.metadata = metadata - return self - - def build(self) -> Dataset: - """Build the dataset model.""" - return Dataset(variables=self.variables, metadata=self.metadata) From 2845d13d27d794c075696aa82d9f6d74fc560320 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:16:58 +0000 Subject: [PATCH 35/55] Cleanup --- src/mdio/core/v1/_overloads.py | 8 ++------ src/mdio/core/v1/_serializer.py | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index e7bcd0fc..c5b8eebc 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -2,7 +2,6 @@ The intent of overloading here is: 1. To provide a consistent mdio.* naming scheme. -2. To simplify the API for users where it makes sense (e.g. MDIO v1 uses Zarr and not HDF5). """ from collections.abc import Mapping @@ -23,8 +22,7 @@ def to_mdio( *args: str | int | float | bool, **kwargs: Mapping[str, str | int | float | bool], ) -> None: - """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" - print("👋 hello world from mdio.to_mdio!") + """Alias for `.to_zarr()`.""" # Ensure zarr_version=2 by default unless explicitly overridden zarr_version = kwargs.get("zarr_version", 2) if zarr_version != 2: @@ -44,8 +42,7 @@ def to_mdio( *args: str | int | float | bool, **kwargs: Mapping[str, str | int | float | bool], ) -> None: - """Alias for `.to_zarr()`, prints a greeting, and writes to Zarr store.""" - print("👋 hello world from mdio.to_mdio!") + """Alias for `.to_zarr()`, and writes to Zarr store.""" # Ensure zarr_version=2 by default unless explicitly overridden zarr_version = kwargs.get("zarr_version", 2) if zarr_version != 2: @@ -72,7 +69,6 @@ def open( Casts the returned xarray.Dataset (and its variables) to the MDIO subclasses. """ - print("👋 hello world from mdio.open!") ds = xr.open_dataset( store, *args, diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index 271f1b57..5bc971d0 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -30,7 +30,7 @@ import zfpy as zfpy_base # Base library from numcodecs import ZFPY # Codec except ImportError: - print(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") + logging.warning(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") zfpy_base = None ZFPY = None From 617a690cfb6786b671b39f781051cd02f46095f2 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:20:59 +0000 Subject: [PATCH 36/55] Cleanup --- src/mdio/schemas/core.py | 54 +++++++++++++--------------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py index 4bbdc6fb..d0b7460a 100644 --- a/src/mdio/schemas/core.py +++ b/src/mdio/schemas/core.py @@ -10,44 +10,24 @@ from pydantic.alias_generators import to_camel from pydantic import Field - -# def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: -# """Extract Pydantic BaseModel fields. - -# Args: -# model: (Type) The model object for which the fields will be extracted. - -# Returns: -# A dictionary containing the fields of the model along with -# their corresponding types and default values. - -# Example: -# >>> class MyModel(BaseModel): -# ... name: str -# ... age: int = 0 -# ... -# >>> model_fields(MyModel) -# {'name': (str, ), 'age': (int, 0)} -# """ -# annotations = get_type_hints(model) - -# fields = {} -# for field_name, field in model.model_fields.items(): -# fields[field_name] = (annotations[field_name], field) - -# return fields - -# def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: -# """Return fields suitable for use in create_model with correct types and defaults.""" -# fields = {} -# for field_name, field_info in model.model_fields.items(): -# annotated_type = field_info.annotation -# default = field_info.default if field_info.default is not None else ... -# fields[field_name] = (annotated_type, Field(default, description=field_info.description)) -# return fields - def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: - """Safely extract fields for create_model, preserving optionality and default behavior.""" + """Extract Pydantic BaseModel fields. + + Args: + model: (Type) The model object for which the fields will be extracted. + + Returns: + A dictionary containing the fields of the model along with + their corresponding types and default values. + + Example: + >>> class MyModel(BaseModel): + ... name: str + ... age: int = 0 + ... + >>> model_fields(MyModel) + {'name': (str, ), 'age': (int, 0)} + """ fields = {} for field_name, field_info in model.model_fields.items(): annotated_type = field_info.annotation From 308d79ef5cc7e4eb292b5c854c0eaf680e46bc9e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:23:38 +0000 Subject: [PATCH 37/55] Relock deps --- uv.lock | 334 +++++++------------------------------------------------- 1 file changed, 37 insertions(+), 297 deletions(-) diff --git a/uv.lock b/uv.lock index 04a86254..a9b23860 100644 --- a/uv.lock +++ b/uv.lock @@ -279,21 +279,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537 }, ] -[[package]] -name = "bandit" -version = "1.8.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "pyyaml" }, - { name = "rich" }, - { name = "stevedore" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1a/a5/144a45f8e67df9d66c3bc3f7e69a39537db8bff1189ab7cff4e9459215da/bandit-1.8.3.tar.gz", hash = "sha256:f5847beb654d309422985c36644649924e0ea4425c76dec2e89110b87506193a", size = 4232005 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/85/db74b9233e0aa27ec96891045c5e920a64dd5cbccd50f8e64e9460f48d35/bandit-1.8.3-py3-none-any.whl", hash = "sha256:28f04dc0d258e1dd0f99dee8eefa13d1cb5e3fde1a5ab0c523971f97b289bcd8", size = 129078 }, -] - [[package]] name = "beautifulsoup4" version = "4.13.4" @@ -304,35 +289,7 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 }, -] - -[[package]] -name = "black" -version = "24.10.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d8/0d/cc2fb42b8c50d80143221515dd7e4766995bd07c56c9a3ed30baf080b6dc/black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", size = 645813 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/cc/7496bb63a9b06a954d3d0ac9fe7a73f3bf1cd92d7a58877c27f4ad1e9d41/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", size = 1607468 }, - { url = "https://files.pythonhosted.org/packages/2b/e3/69a738fb5ba18b5422f50b4f143544c664d7da40f09c13969b2fd52900e0/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", size = 1437270 }, - { url = "https://files.pythonhosted.org/packages/c9/9b/2db8045b45844665c720dcfe292fdaf2e49825810c0103e1191515fc101a/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", size = 1737061 }, - { url = "https://files.pythonhosted.org/packages/a3/95/17d4a09a5be5f8c65aa4a361444d95edc45def0de887810f508d3f65db7a/black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", size = 1423293 }, - { url = "https://files.pythonhosted.org/packages/90/04/bf74c71f592bcd761610bbf67e23e6a3cff824780761f536512437f1e655/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", size = 1644256 }, - { url = "https://files.pythonhosted.org/packages/4c/ea/a77bab4cf1887f4b2e0bce5516ea0b3ff7d04ba96af21d65024629afedb6/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", size = 1448534 }, - { url = "https://files.pythonhosted.org/packages/4e/3e/443ef8bc1fbda78e61f79157f303893f3fddf19ca3c8989b163eb3469a12/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", size = 1761892 }, - { url = "https://files.pythonhosted.org/packages/52/93/eac95ff229049a6901bc84fec6908a5124b8a0b7c26ea766b3b8a5debd22/black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8", size = 1434796 }, - { url = "https://files.pythonhosted.org/packages/d0/a0/a993f58d4ecfba035e61fca4e9f64a2ecae838fc9f33ab798c62173ed75c/black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981", size = 1643986 }, - { url = "https://files.pythonhosted.org/packages/37/d5/602d0ef5dfcace3fb4f79c436762f130abd9ee8d950fa2abdbf8bbc555e0/black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b", size = 1448085 }, - { url = "https://files.pythonhosted.org/packages/47/6d/a3a239e938960df1a662b93d6230d4f3e9b4a22982d060fc38c42f45a56b/black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2", size = 1760928 }, - { url = "https://files.pythonhosted.org/packages/dd/cf/af018e13b0eddfb434df4d9cd1b2b7892bab119f7a20123e93f6910982e8/black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b", size = 1436875 }, - { url = "https://files.pythonhosted.org/packages/8d/a7/4b27c50537ebca8bec139b872861f9d2bf501c5ec51fcf897cb924d9e264/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d", size = 206898 }, + { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285 }, ] [[package]] @@ -720,43 +677,36 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/13/1f/9fa001e74a1993a9cadd2333bb889e50c66327b8594ac538ab8a04f915b7/cryptography-45.0.3.tar.gz", hash = "sha256:ec21313dd335c51d7877baf2972569f40a4291b76a0ce51391523ae358d05899", size = 744738 } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361 }, - { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350 }, - { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572 }, - { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124 }, - { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122 }, - { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831 }, - { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583 }, - { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753 }, - { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550 }, - { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367 }, - { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843 }, - { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057 }, - { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789 }, - { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919 }, - { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812 }, - { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571 }, - { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832 }, - { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719 }, - { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852 }, - { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906 }, - { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572 }, - { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, - { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, - { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, - { url = "https://files.pythonhosted.org/packages/d6/d7/f30e75a6aa7d0f65031886fa4a1485c2fbfe25a1896953920f6a9cfe2d3b/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d", size = 3887513 }, - { url = "https://files.pythonhosted.org/packages/9c/b4/7a494ce1032323ca9db9a3661894c66e0d7142ad2079a4249303402d8c71/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471", size = 4107432 }, - { url = "https://files.pythonhosted.org/packages/45/f8/6b3ec0bc56123b344a8d2b3264a325646d2dcdbdd9848b5e6f3d37db90b3/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615", size = 3891421 }, - { url = "https://files.pythonhosted.org/packages/57/ff/f3b4b2d007c2a646b0f69440ab06224f9cf37a977a72cdb7b50632174e8a/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390", size = 4107081 }, -] - -[[package]] -name = "darglint" -version = "1.8.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/2c/86e8549e349388c18ca8a4ff8661bb5347da550f598656d32a98eaaf91cc/darglint-1.8.1.tar.gz", hash = "sha256:080d5106df149b199822e7ee7deb9c012b49891538f14a11be681044f0bb20da", size = 74435 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/28/85d1e0396d64422c5218d68e5cdcc53153aa8a2c83c7dbc3ee1502adf3a1/darglint-1.8.1-py3-none-any.whl", hash = "sha256:5ae11c259c17b0701618a20c3da343a3eb98b3bc4b5a83d31cdd94f5ebdced8d", size = 120767 }, + { url = "https://files.pythonhosted.org/packages/82/b2/2345dc595998caa6f68adf84e8f8b50d18e9fc4638d32b22ea8daedd4b7a/cryptography-45.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:7573d9eebaeceeb55285205dbbb8753ac1e962af3d9640791d12b36864065e71", size = 7056239 }, + { url = "https://files.pythonhosted.org/packages/71/3d/ac361649a0bfffc105e2298b720d8b862330a767dab27c06adc2ddbef96a/cryptography-45.0.3-cp311-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d377dde61c5d67eb4311eace661c3efda46c62113ff56bf05e2d679e02aebb5b", size = 4205541 }, + { url = "https://files.pythonhosted.org/packages/70/3e/c02a043750494d5c445f769e9c9f67e550d65060e0bfce52d91c1362693d/cryptography-45.0.3-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fae1e637f527750811588e4582988932c222f8251f7b7ea93739acb624e1487f", size = 4433275 }, + { url = "https://files.pythonhosted.org/packages/40/7a/9af0bfd48784e80eef3eb6fd6fde96fe706b4fc156751ce1b2b965dada70/cryptography-45.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ca932e11218bcc9ef812aa497cdf669484870ecbcf2d99b765d6c27a86000942", size = 4209173 }, + { url = "https://files.pythonhosted.org/packages/31/5f/d6f8753c8708912df52e67969e80ef70b8e8897306cd9eb8b98201f8c184/cryptography-45.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:af3f92b1dc25621f5fad065288a44ac790c5798e986a34d393ab27d2b27fcff9", size = 3898150 }, + { url = "https://files.pythonhosted.org/packages/8b/50/f256ab79c671fb066e47336706dc398c3b1e125f952e07d54ce82cf4011a/cryptography-45.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f8f8f0b73b885ddd7f3d8c2b2234a7d3ba49002b0223f58cfde1bedd9563c56", size = 4466473 }, + { url = "https://files.pythonhosted.org/packages/62/e7/312428336bb2df0848d0768ab5a062e11a32d18139447a76dfc19ada8eed/cryptography-45.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9cc80ce69032ffa528b5e16d217fa4d8d4bb7d6ba8659c1b4d74a1b0f4235fca", size = 4211890 }, + { url = "https://files.pythonhosted.org/packages/e7/53/8a130e22c1e432b3c14896ec5eb7ac01fb53c6737e1d705df7e0efb647c6/cryptography-45.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c824c9281cb628015bfc3c59335163d4ca0540d49de4582d6c2637312907e4b1", size = 4466300 }, + { url = "https://files.pythonhosted.org/packages/ba/75/6bb6579688ef805fd16a053005fce93944cdade465fc92ef32bbc5c40681/cryptography-45.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5833bb4355cb377ebd880457663a972cd044e7f49585aee39245c0d592904578", size = 4332483 }, + { url = "https://files.pythonhosted.org/packages/2f/11/2538f4e1ce05c6c4f81f43c1ef2bd6de7ae5e24ee284460ff6c77e42ca77/cryptography-45.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9bb5bf55dcb69f7067d80354d0a348368da907345a2c448b0babc4215ccd3497", size = 4573714 }, + { url = "https://files.pythonhosted.org/packages/f5/bb/e86e9cf07f73a98d84a4084e8fd420b0e82330a901d9cac8149f994c3417/cryptography-45.0.3-cp311-abi3-win32.whl", hash = "sha256:3ad69eeb92a9de9421e1f6685e85a10fbcfb75c833b42cc9bc2ba9fb00da4710", size = 2934752 }, + { url = "https://files.pythonhosted.org/packages/c7/75/063bc9ddc3d1c73e959054f1fc091b79572e716ef74d6caaa56e945b4af9/cryptography-45.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:97787952246a77d77934d41b62fb1b6f3581d83f71b44796a4158d93b8f5c490", size = 3412465 }, + { url = "https://files.pythonhosted.org/packages/71/9b/04ead6015229a9396890d7654ee35ef630860fb42dc9ff9ec27f72157952/cryptography-45.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:c92519d242703b675ccefd0f0562eb45e74d438e001f8ab52d628e885751fb06", size = 7031892 }, + { url = "https://files.pythonhosted.org/packages/46/c7/c7d05d0e133a09fc677b8a87953815c522697bdf025e5cac13ba419e7240/cryptography-45.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5edcb90da1843df85292ef3a313513766a78fbbb83f584a5a58fb001a5a9d57", size = 4196181 }, + { url = "https://files.pythonhosted.org/packages/08/7a/6ad3aa796b18a683657cef930a986fac0045417e2dc428fd336cfc45ba52/cryptography-45.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38deed72285c7ed699864f964a3f4cf11ab3fb38e8d39cfcd96710cd2b5bb716", size = 4423370 }, + { url = "https://files.pythonhosted.org/packages/4f/58/ec1461bfcb393525f597ac6a10a63938d18775b7803324072974b41a926b/cryptography-45.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5555365a50efe1f486eed6ac7062c33b97ccef409f5970a0b6f205a7cfab59c8", size = 4197839 }, + { url = "https://files.pythonhosted.org/packages/d4/3d/5185b117c32ad4f40846f579369a80e710d6146c2baa8ce09d01612750db/cryptography-45.0.3-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e4253ed8f5948a3589b3caee7ad9a5bf218ffd16869c516535325fece163dcc", size = 3886324 }, + { url = "https://files.pythonhosted.org/packages/67/85/caba91a57d291a2ad46e74016d1f83ac294f08128b26e2a81e9b4f2d2555/cryptography-45.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cfd84777b4b6684955ce86156cfb5e08d75e80dc2585e10d69e47f014f0a5342", size = 4450447 }, + { url = "https://files.pythonhosted.org/packages/ae/d1/164e3c9d559133a38279215c712b8ba38e77735d3412f37711b9f8f6f7e0/cryptography-45.0.3-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:a2b56de3417fd5f48773ad8e91abaa700b678dc7fe1e0c757e1ae340779acf7b", size = 4200576 }, + { url = "https://files.pythonhosted.org/packages/71/7a/e002d5ce624ed46dfc32abe1deff32190f3ac47ede911789ee936f5a4255/cryptography-45.0.3-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:57a6500d459e8035e813bd8b51b671977fb149a8c95ed814989da682314d0782", size = 4450308 }, + { url = "https://files.pythonhosted.org/packages/87/ad/3fbff9c28cf09b0a71e98af57d74f3662dea4a174b12acc493de00ea3f28/cryptography-45.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f22af3c78abfbc7cbcdf2c55d23c3e022e1a462ee2481011d518c7fb9c9f3d65", size = 4325125 }, + { url = "https://files.pythonhosted.org/packages/f5/b4/51417d0cc01802304c1984d76e9592f15e4801abd44ef7ba657060520bf0/cryptography-45.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:232954730c362638544758a8160c4ee1b832dc011d2c41a306ad8f7cccc5bb0b", size = 4560038 }, + { url = "https://files.pythonhosted.org/packages/80/38/d572f6482d45789a7202fb87d052deb7a7b136bf17473ebff33536727a2c/cryptography-45.0.3-cp37-abi3-win32.whl", hash = "sha256:cb6ab89421bc90e0422aca911c69044c2912fc3debb19bb3c1bfe28ee3dff6ab", size = 2924070 }, + { url = "https://files.pythonhosted.org/packages/91/5a/61f39c0ff4443651cc64e626fa97ad3099249152039952be8f344d6b0c86/cryptography-45.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:d54ae41e6bd70ea23707843021c778f151ca258081586f0cfa31d936ae43d1b2", size = 3395005 }, + { url = "https://files.pythonhosted.org/packages/e7/d4/58a246342093a66af8935d6aa59f790cbb4731adae3937b538d054bdc2f9/cryptography-45.0.3-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:edd6d51869beb7f0d472e902ef231a9b7689508e83880ea16ca3311a00bf5ce7", size = 3589802 }, + { url = "https://files.pythonhosted.org/packages/96/61/751ebea58c87b5be533c429f01996050a72c7283b59eee250275746632ea/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:555e5e2d3a53b4fabeca32835878b2818b3f23966a4efb0d566689777c5a12c8", size = 4146964 }, + { url = "https://files.pythonhosted.org/packages/8d/01/28c90601b199964de383da0b740b5156f5d71a1da25e7194fdf793d373ef/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:25286aacb947286620a31f78f2ed1a32cded7be5d8b729ba3fb2c988457639e4", size = 4388103 }, + { url = "https://files.pythonhosted.org/packages/3d/ec/cd892180b9e42897446ef35c62442f5b8b039c3d63a05f618aa87ec9ebb5/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:050ce5209d5072472971e6efbfc8ec5a8f9a841de5a4db0ebd9c2e392cb81972", size = 4150031 }, + { url = "https://files.pythonhosted.org/packages/db/d4/22628c2dedd99289960a682439c6d3aa248dff5215123ead94ac2d82f3f5/cryptography-45.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:dc10ec1e9f21f33420cc05214989544727e776286c1c16697178978327b95c9c", size = 4387389 }, + { url = "https://files.pythonhosted.org/packages/39/ec/ba3961abbf8ecb79a3586a4ff0ee08c9d7a9938b4312fb2ae9b63f48a8ba/cryptography-45.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:9eda14f049d7f09c2e8fb411dda17dd6b16a3c76a1de5e249188a32aeb92de19", size = 3337432 }, ] [[package]] @@ -915,73 +865,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] -[[package]] -name = "flake8" -version = "7.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mccabe" }, - { name = "pycodestyle" }, - { name = "pyflakes" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e7/c4/5842fc9fc94584c455543540af62fd9900faade32511fab650e9891ec225/flake8-7.2.0.tar.gz", hash = "sha256:fa558ae3f6f7dbf2b4f22663e5343b6b6023620461f8d4ff2019ef4b5ee70426", size = 48177 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/5c/0627be4c9976d56b1217cb5187b7504e7fd7d3503f8bfd312a04077bd4f7/flake8-7.2.0-py2.py3-none-any.whl", hash = "sha256:93b92ba5bdb60754a6da14fa3b93a9361fd00a59632ada61fd7b130436c40343", size = 57786 }, -] - -[[package]] -name = "flake8-bandit" -version = "4.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bandit" }, - { name = "flake8" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/77/1c/4f66a7a52a246d6c64312b5c40da3af3630cd60b27af81b137796af3c0bc/flake8_bandit-4.1.1.tar.gz", hash = "sha256:068e09287189cbfd7f986e92605adea2067630b75380c6b5733dab7d87f9a84e", size = 5403 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/5f/55bab0ac89f9ad9f4c6e38087faa80c252daec4ccb7776b4dac216ca9e3f/flake8_bandit-4.1.1-py3-none-any.whl", hash = "sha256:4c8a53eb48f23d4ef1e59293657181a3c989d0077c9952717e98a0eace43e06d", size = 4828 }, -] - -[[package]] -name = "flake8-bugbear" -version = "24.12.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "flake8" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c7/25/48ba712ff589b0149f21135234f9bb45c14d6689acc6151b5e2ff8ac2ae9/flake8_bugbear-24.12.12.tar.gz", hash = "sha256:46273cef0a6b6ff48ca2d69e472f41420a42a46e24b2a8972e4f0d6733d12a64", size = 82907 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/21/0a875f75fbe4008bd171e2fefa413536258fe6b4cfaaa087986de74588f4/flake8_bugbear-24.12.12-py3-none-any.whl", hash = "sha256:1b6967436f65ca22a42e5373aaa6f2d87966ade9aa38d4baf2a1be550767545e", size = 36664 }, -] - -[[package]] -name = "flake8-docstrings" -version = "1.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flake8" }, - { name = "pydocstyle" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/93/24/f839e3a06e18f4643ccb81370909a497297909f15106e6af2fecdef46894/flake8_docstrings-1.7.0.tar.gz", hash = "sha256:4c8cc748dc16e6869728699e5d0d685da9a10b0ea718e090b1ba088e67a941af", size = 5995 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/7d/76a278fa43250441ed9300c344f889c7fb1817080c8fb8996b840bf421c2/flake8_docstrings-1.7.0-py2.py3-none-any.whl", hash = "sha256:51f2344026da083fc084166a9353f5082b01f72901df422f74b4d953ae88ac75", size = 4994 }, -] - -[[package]] -name = "flake8-rst-docstrings" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flake8" }, - { name = "pygments" }, - { name = "restructuredtext-lint" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b2/e5/013d5858b69b4ba38ff259d55bd8d107009f212f296be0824b7c4a27d7ed/flake8-rst-docstrings-0.3.0.tar.gz", hash = "sha256:d1ce22b4bd37b73cd86b8d980e946ef198cfcc18ed82fedb674ceaa2f8d1afa4", size = 19865 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/12/bf/0e6933d78d172df672325622bf1b7f8364f4a6515da9e89398227c19d02e/flake8_rst_docstrings-0.3.0-py3-none-any.whl", hash = "sha256:f8c3c6892ff402292651c31983a38da082480ad3ba253743de52989bdc84ca1c", size = 10892 }, -] - [[package]] name = "flexcache" version = "0.3" @@ -1443,15 +1326,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, ] -[[package]] -name = "isort" -version = "5.13.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/87/f9/c1eb8635a24e87ade2efce21e3ce8cd6b8630bb685ddc9cdaca1349b2eb5/isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109", size = 175303 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 }, -] - [[package]] name = "jedi" version = "0.19.2" @@ -1695,15 +1569,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, ] -[[package]] -name = "mccabe" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350 }, -] - [[package]] name = "mdit-py-plugins" version = "0.4.2" @@ -1905,23 +1770,14 @@ lossy = [ [package.dev-dependencies] dev = [ - { name = "black" }, { name = "coverage", extra = ["toml"] }, - { name = "darglint" }, - { name = "flake8" }, - { name = "flake8-bandit" }, - { name = "flake8-bugbear" }, - { name = "flake8-docstrings" }, - { name = "flake8-rst-docstrings" }, - { name = "isort" }, { name = "mypy" }, - { name = "pep8-naming" }, { name = "pre-commit" }, { name = "pre-commit-hooks" }, { name = "pygments" }, { name = "pytest" }, { name = "pytest-dependency" }, - { name = "pyupgrade" }, + { name = "ruff" }, { name = "safety" }, { name = "typeguard" }, { name = "xdoctest", extra = ["colors"] }, @@ -1948,16 +1804,14 @@ requires-dist = [ { name = "fsspec", specifier = ">=2024.10.0" }, { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2024.10.0" }, { name = "pint", specifier = ">=0.24.3,<0.25" }, - { name = "pint", specifier = ">=0.24.3,<0.25" }, { name = "psutil", specifier = ">=6.1.0,<7.0.0" }, { name = "pydantic", specifier = ">=2.8.2,<3.0.0" }, { name = "pydantic-settings", specifier = ">=2.4.0,<3.0.0" }, - { name = "pydantic", specifier = ">=2.8.2,<3.0.0" }, - { name = "pydantic-settings", specifier = ">=2.4.0,<3.0.0" }, { name = "rich", specifier = ">=13.9.4,<14.0.0" }, { name = "s3fs", marker = "extra == 'cloud'", specifier = "==2024.12.0" }, { name = "segy", specifier = ">=0.4.0,<0.5.0" }, { name = "tqdm", specifier = ">=4.67.0,<5.0.0" }, + { name = "xarray", specifier = ">=2025.3.1" }, { name = "xarray", specifier = ">=2025.4.0" }, { name = "zarr", specifier = ">=3.0.8,<4.0.0" }, { name = "zfpy", marker = "extra == 'lossy'", specifier = ">=1.0.1,<2.0.0" }, @@ -1966,23 +1820,14 @@ provides-extras = ["cloud", "distributed", "lossy"] [package.metadata.requires-dev] dev = [ - { name = "black", specifier = ">=24.10.0,<25" }, { name = "coverage", extras = ["toml"], specifier = ">=7.6.7,<8" }, - { name = "darglint", specifier = ">=1.8.1,<2" }, - { name = "flake8", specifier = ">=7.1.0,<8" }, - { name = "flake8-bandit", specifier = ">=4.1.1,<5" }, - { name = "flake8-bugbear", specifier = ">=24.4.26,<25" }, - { name = "flake8-docstrings", specifier = ">=1.7.0,<2" }, - { name = "flake8-rst-docstrings", specifier = ">=0.3.0,<0.4" }, - { name = "isort", specifier = ">=5.13.2,<6" }, { name = "mypy", specifier = ">=1.13.0,<2" }, - { name = "pep8-naming", specifier = ">=0.14.1,<0.15" }, { name = "pre-commit", specifier = ">=4.0.1,<5" }, { name = "pre-commit-hooks", specifier = ">=5.0.0,<6" }, { name = "pygments", specifier = ">=2.18.0,<3" }, { name = "pytest", specifier = ">=8.3.3,<9" }, { name = "pytest-dependency", specifier = ">=0.6.0,<0.7" }, - { name = "pyupgrade", specifier = ">=3.19.0,<4" }, + { name = "ruff", specifier = ">=0.11.8" }, { name = "safety", specifier = ">=3.2.3,<4" }, { name = "typeguard", specifier = ">=4.4.1,<5" }, { name = "xdoctest", extras = ["colors"], specifier = ">=1.2.0,<2" }, @@ -1991,7 +1836,7 @@ docs = [ { name = "furo", specifier = ">=2024.8.6" }, { name = "linkify-it-py", specifier = ">=2.0.3" }, { name = "myst-nb", specifier = ">=1.2.0" }, - { name = "sphinx", specifier = ">=8.1.3,<9" }, + { name = "sphinx", specifier = ">=8.2.3,<9" }, { name = "sphinx-autobuild", specifier = ">=2024.10.3" }, { name = "sphinx-click", specifier = ">=6.0.0,<7" }, { name = "sphinx-copybutton", specifier = ">=0.5.2,<0.6" }, @@ -2336,39 +2181,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905 }, ] -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, -] - -[[package]] -name = "pbr" -version = "6.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/01/d2/510cc0d218e753ba62a1bc1434651db3cd797a9716a0a66cc714cb4f0935/pbr-6.1.1.tar.gz", hash = "sha256:93ea72ce6989eb2eed99d0f75721474f69ad88128afdef5ac377eb797c4bf76b", size = 125702 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/ac/684d71315abc7b1214d59304e23a982472967f6bf4bde5a98f1503f648dc/pbr-6.1.1-py2.py3-none-any.whl", hash = "sha256:38d4daea5d9fa63b3f626131b9d34947fd0c8be9b05a29276870580050a25a76", size = 108997 }, -] - -[[package]] -name = "pep8-naming" -version = "0.14.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "flake8" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/be/8e/1de32e908d8b008bb9352bfe7749aedecb71e2793d36c7ee342716acd1ec/pep8-naming-0.14.1.tar.gz", hash = "sha256:1ef228ae80875557eb6c1549deafed4dabbf3261cfcafa12f773fe0db9be8a36", size = 16546 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/a2/450b71d1a87fcee50a7b994a53b1c68fc6a6b718df0eb035f2bffb2d3a4f/pep8_naming-0.14.1-py3-none-any.whl", hash = "sha256:63f514fc777d715f935faf185dedd679ab99526a7f2f503abb61587877f7b1c5", size = 8859 }, -] - [[package]] name = "pexpect" version = "4.9.0" @@ -2666,15 +2478,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, ] -[[package]] -name = "pycodestyle" -version = "2.13.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/04/6e/1f4a62078e4d95d82367f24e685aef3a672abfd27d1a868068fed4ed2254/pycodestyle-2.13.0.tar.gz", hash = "sha256:c8415bf09abe81d9c7f872502a6eee881fbe85d8763dd5b9924bb0a01d67efae", size = 39312 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/be/b00116df1bfb3e0bb5b45e29d604799f7b91dd861637e4d448b4e09e6a3e/pycodestyle-2.13.0-py2.py3-none-any.whl", hash = "sha256:35863c5974a271c7a726ed228a14a4f6daf49df369d8c50cd9a6f58a5e143ba9", size = 31424 }, -] - [[package]] name = "pycparser" version = "2.22" @@ -2756,28 +2559,7 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/67/1d/42628a2c33e93f8e9acbde0d5d735fa0850f3e6a2f8cb1eb6c40b9a732ac/pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268", size = 163234 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/53/a64f03044927dc47aafe029c42a5b7aabc38dfb813475e0e1bf71c4a59d0/pydantic_settings-2.8.1-py3-none-any.whl", hash = "sha256:81942d5ac3d905f7f3ee1a70df5dfb62d5569c12f51a5a647defc1c3d9ee2e9c", size = 30839 }, -] - -[[package]] -name = "pydocstyle" -version = "6.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "snowballstemmer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/d5385ca59fd065e3c6a5fe19f9bc9d5ea7f2509fa8c9c22fb6b2031dd953/pydocstyle-6.3.0.tar.gz", hash = "sha256:7ce43f0c0ac87b07494eb9c0b462c0b73e6ff276807f204d6b53edc72b7e44e1", size = 36796 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/36/ea/99ddefac41971acad68f14114f38261c1f27dac0b3ec529824ebc739bdaa/pydocstyle-6.3.0-py3-none-any.whl", hash = "sha256:118762d452a49d6b05e194ef344a55822987a462831ade91ec5c06fd2169d019", size = 38038 }, -] - -[[package]] -name = "pyflakes" -version = "3.3.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/af/cc/1df338bd7ed1fa7c317081dcf29bf2f01266603b301e6858856d346a12b3/pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b", size = 64175 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/15/40/b293a4fa769f3b02ab9e387c707c4cbdc34f073f945de0386107d4e669e6/pyflakes-3.3.2-py2.py3-none-any.whl", hash = "sha256:5039c8339cbb1944045f4ee5466908906180f13cc99cc9949348d10f82a5c32a", size = 63164 }, + { url = "https://files.pythonhosted.org/packages/b6/5f/d6d641b490fd3ec2c4c13b4244d68deea3a1b970a97be64f34fb5504ff72/pydantic_settings-2.9.1-py3-none-any.whl", hash = "sha256:59b4f431b1defb26fe620c71a7d3968a710d719f5f4cdbbdb7926edeb770f6ef", size = 44356 }, ] [[package]] @@ -2858,18 +2640,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, ] -[[package]] -name = "pyupgrade" -version = "3.19.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "tokenize-rt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/35/3a/efa8e75cf84d53f1b3f0113387ab120ef460396a4068e41b6cf18a3d216d/pyupgrade-3.19.1.tar.gz", hash = "sha256:d10e8c5f54b8327211828769e98d95d95e4715de632a3414f1eef3f51357b9e2", size = 45116 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/dd/43/c6c1ff945c7900613f6e6ef2a8688639a247d62eb0ffa9935c599f69c08e/pyupgrade-3.19.1-py2.py3-none-any.whl", hash = "sha256:8c5b0bfacae5ff30fa136a53eb7f22c34ba007450d4099e9da8089dabb9e67c9", size = 62412 }, -] - [[package]] name = "pywin32" version = "310" @@ -3132,15 +2902,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, ] -[[package]] -name = "restructuredtext-lint" -version = "1.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "docutils" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/9c/6d8035cafa2d2d314f34e6cd9313a299de095b26e96f1c7312878f988eec/restructuredtext_lint-1.4.0.tar.gz", hash = "sha256:1b235c0c922341ab6c530390892eb9e92f90b9b75046063e047cacfb0f050c45", size = 16723 } - [[package]] name = "rich" version = "13.9.4" @@ -3672,19 +3433,7 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/4b/528ccf7a982216885a1ff4908e886b8fb5f19862d1962f56a3fce2435a70/starlette-0.46.1-py3-none-any.whl", hash = "sha256:77c74ed9d2720138b25875133f3a2dae6d854af2ec37dceb56aef370c1d8a227", size = 71995 }, -] - -[[package]] -name = "stevedore" -version = "5.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pbr" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/3f/13cacea96900bbd31bb05c6b74135f85d15564fc583802be56976c940470/stevedore-5.4.1.tar.gz", hash = "sha256:3135b5ae50fe12816ef291baff420acb727fcd356106e3e9cbfa9e5985cd6f4b", size = 513858 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/45/8c4ebc0c460e6ec38e62ab245ad3c7fc10b210116cea7c16d61602aa9558/stevedore-5.4.1-py3-none-any.whl", hash = "sha256:d10a31c7b86cba16c1f6e8d15416955fc797052351a56af15e608ad20811fcfe", size = 49533 }, + { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037 }, ] [[package]] @@ -3705,15 +3454,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/aa5c8b10b2cce7a053018e0d132bd58e27527a0243c4985383d5b6fd93e9/tblib-3.1.0-py3-none-any.whl", hash = "sha256:670bb4582578134b3d81a84afa1b016128b429f3d48e6cbbaecc9d15675e984e", size = 12552 }, ] -[[package]] -name = "tokenize-rt" -version = "6.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/0a/5854d8ced8c1e00193d1353d13db82d7f813f99bd5dcb776ce3e2a4c0d19/tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86", size = 5506 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/ba/576aac29b10dfa49a6ce650001d1bb31f81e734660555eaf144bfe5b8995/tokenize_rt-6.1.0-py2.py3-none-any.whl", hash = "sha256:d706141cdec4aa5f358945abe36b911b8cbdc844545da99e811250c0cee9b6fc", size = 6015 }, -] - [[package]] name = "tenacity" version = "9.1.2" From 7b03da4cbbd92014b6bbb460b7a4b4ee748c09ad Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:28:17 +0000 Subject: [PATCH 38/55] Cleanup pyproject toml --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 525c1735..d5605839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,9 +34,7 @@ dependencies = [ "rich (>=13.9.4,<14.0.0)", "segy (>=0.4.0,<0.5.0)", "tqdm (>=4.67.0,<5.0.0)", - "xarray>=2025.3.1", "zarr (>=3.0.8,<4.0.0)", - "pint (>=0.24.3,<0.25)", "xarray (>=2025.4.0)", ] From 31c294b8c5a8578d5f732c248154889e9080c1fe Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:29:22 +0000 Subject: [PATCH 39/55] Fix missing import --- src/mdio/core/v1/_serializer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index 5bc971d0..6f6ce959 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -26,6 +26,8 @@ from mdio.schemas.chunk_grid import * from mdio.schemas.v1.stats import * +import logging + try: import zfpy as zfpy_base # Base library from numcodecs import ZFPY # Codec From aa5cf859fc3bced1040152ae42cdee5b03f1b5b4 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 15:40:33 +0000 Subject: [PATCH 40/55] Remove excess function import from init --- src/mdio/core/v1/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mdio/core/v1/__init__.py b/src/mdio/core/v1/__init__.py index afaa3bac..d8ec5b85 100644 --- a/src/mdio/core/v1/__init__.py +++ b/src/mdio/core/v1/__init__.py @@ -10,7 +10,6 @@ from ._serializer import make_named_dimension from ._serializer import make_variable from .builder import MDIODatasetBuilder -from .builder import write_mdio_metadata from .factory import SCHEMA_TEMPLATE_MAP from .factory import MDIOSchemaType @@ -22,7 +21,6 @@ "make_named_dimension", "make_variable", "mdio", - "write_mdio_metadata", "MDIOSchemaType", "SCHEMA_TEMPLATE_MAP", ] From aba7637bbfa084841775106a93260b6ebfcfd84d Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 17:37:19 +0000 Subject: [PATCH 41/55] Linting --- src/mdio/core/v1/_overloads.py | 14 +++--- src/mdio/core/v1/_serializer.py | 18 ++++--- src/mdio/schemas/core.py | 6 +-- tests/test_main.py | 1 + tests/unit/test_schema.py | 85 +++++++++++++++++---------------- uv.lock | 1 - 6 files changed, 64 insertions(+), 61 deletions(-) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index c5b8eebc..55fb0a2b 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -25,8 +25,9 @@ def to_mdio( """Alias for `.to_zarr()`.""" # Ensure zarr_version=2 by default unless explicitly overridden zarr_version = kwargs.get("zarr_version", 2) - if zarr_version != 2: - raise ValueError("MDIO only supports zarr_version=2") + if zarr_version != 2: # noqa: PLR2004 + msg = "MDIO only supports zarr_version=2" + raise ValueError(msg) kwargs["zarr_version"] = zarr_version return super().to_zarr(*args, store=store, **kwargs) @@ -45,8 +46,9 @@ def to_mdio( """Alias for `.to_zarr()`, and writes to Zarr store.""" # Ensure zarr_version=2 by default unless explicitly overridden zarr_version = kwargs.get("zarr_version", 2) - if zarr_version != 2: - raise ValueError("MDIO only supports zarr_version=2") + if zarr_version != 2: # noqa: PLR2004 + msg = "MDIO only supports zarr_version=2" + raise ValueError(msg) kwargs["zarr_version"] = zarr_version return super().to_zarr(*args, store=store, **kwargs) @@ -80,9 +82,9 @@ def open( ds.__class__ = MDIODataset # Cast each DataArray in data_vars and coords - for _name, var in ds.data_vars.items(): + for _name, var in ds.data_vars.items(): # noqa: PERF102 .values() failed tests var.__class__ = MDIODataArray - for _name, coord in ds.coords.items(): + for _name, coord in ds.coords.items(): # noqa: PERF102 .values() failed tests coord.__class__ = MDIODataArray return ds diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index 6f6ce959..c6cc8146 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -11,6 +11,7 @@ from numcodecs import Blosc as NumcodecsBlosc from mdio.core.v1._overloads import mdio +from mdio.schemas.chunk_grid import * # noqa: F403 from mdio.schemas.compressors import ZFP from mdio.schemas.compressors import Blosc from mdio.schemas.dimension import NamedDimension @@ -19,20 +20,16 @@ from mdio.schemas.metadata import UserAttributes from mdio.schemas.v1.dataset import Dataset as MDIODataset from mdio.schemas.v1.dataset import DatasetMetadata +from mdio.schemas.v1.stats import * # noqa: F403 from mdio.schemas.v1.units import AllUnits from mdio.schemas.v1.variable import Coordinate from mdio.schemas.v1.variable import Variable from mdio.schemas.v1.variable import VariableMetadata -from mdio.schemas.chunk_grid import * -from mdio.schemas.v1.stats import * - -import logging try: import zfpy as zfpy_base # Base library from numcodecs import ZFPY # Codec except ImportError: - logging.warning(f"Tried to import zfpy and numcodecs zfpy but failed because {ImportError}") zfpy_base = None ZFPY = None @@ -87,9 +84,7 @@ def make_variable( # noqa: PLR0913 PLR0912 TypeError: If the metadata type is not supported. """ - # TODO(BrianMichell) #0: I suspect that this is only partially correct... - - def _to_serializable(val: Any) -> Any: + def _to_serializable(val: object) -> dict[str, Any] | object: return val.model_dump(mode="json", by_alias=True) if hasattr(val, "model_dump") else val var_metadata = None @@ -104,7 +99,9 @@ def _to_serializable(val: Any) -> Any: metadata_dict["unitsV1"] = val elif isinstance(md, UserAttributes): attrs = _to_serializable(md) - metadata_dict["attributes"] = attrs[0] if isinstance(attrs, list) and len(attrs) == 1 else attrs + metadata_dict["attributes"] = ( + attrs[0] if isinstance(attrs, list) and len(attrs) == 1 else attrs + ) var_metadata = VariableMetadata(**metadata_dict) elif isinstance(metadata, dict): @@ -121,7 +118,8 @@ def _to_serializable(val: Any) -> Any: var_metadata = metadata else: - raise TypeError(f"Unsupported metadata type: {type(metadata)}") + msg = f"Unsupported metadata type: {type(metadata)}" + raise TypeError(msg) return Variable( name=name, diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py index d0b7460a..c61fd60b 100644 --- a/src/mdio/schemas/core.py +++ b/src/mdio/schemas/core.py @@ -3,12 +3,12 @@ from __future__ import annotations from typing import Any -from typing import get_type_hints from pydantic import BaseModel from pydantic import ConfigDict -from pydantic.alias_generators import to_camel from pydantic import Field +from pydantic.alias_generators import to_camel + def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: """Extract Pydantic BaseModel fields. @@ -57,7 +57,7 @@ class CamelCaseStrictModel(StrictModel): ser_json_by_alias=True, ) - def model_dump_json(self, *args, **kwargs): # type: ignore[override] + def model_dump_json(self, *args, **kwargs) -> dict: # noqa: ANN201 ANN001 ANN002 ANN003 """Dump JSON using camelCase aliases and excluding None values by default.""" # Ensure camelCase aliases if "by_alias" not in kwargs: diff --git a/tests/test_main.py b/tests/test_main.py index 1685c011..e0fb0131 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -30,6 +30,7 @@ def test_main_succeeds(runner: CliRunner, segy_input: Path, zarr_tmp: Path) -> N def test_main_cloud(runner: CliRunner, segy_input_uri: str, zarr_tmp: Path) -> None: """It exits with a status code of zero.""" os.environ["MDIO__IMPORT__CLOUD_NATIVE"] = "true" + os.environ["MDIO__IMPORT__CPU_COUNT"] = "1" cli_args = ["segy", "import", segy_input_uri, str(zarr_tmp)] cli_args.extend(["--header-locations", "181,185"]) cli_args.extend(["--header-names", "inline,crossline"]) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 012e3099..e862624f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -311,24 +311,25 @@ class TestPydanticMDIORoundTrip: def test_json_to_mdio_dataset(self, tmp_path: Path) -> None: """Test converting TEST_SCHEMA JSON to an MDIO dataset using to_mdio.""" from mdio.core.v1._serializer import _construct_mdio_dataset - + output_path = tmp_path / "from_json.mdio" # output_path = "test_mdio_from_json.mdio" - + # Step 1: Validate the TEST_SCHEMA JSON with Pydantic dataset = V1Dataset.model_validate(TEST_SCHEMA) - + # Step 2: Convert to MDIO dataset using the internal constructor mdio_dataset = _construct_mdio_dataset(dataset) - + # Step 3: Use to_mdio to save the dataset mdio_dataset.to_mdio(store=str(output_path)) - + # Verify the dataset was created assert output_path.exists() - + # Verify we can read it back from mdio.core.v1 import mdio + with mdio.open(str(output_path)) as reader: assert "actual_variable" in reader assert "coord" in reader @@ -338,21 +339,21 @@ def test_json_to_mdio_dataset(self, tmp_path: Path) -> None: def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: """Test converting an MDIO dataset back to JSON (camelCase).""" - from mdio.core.v1._serializer import _construct_mdio_dataset from mdio.core.v1 import mdio - + from mdio.core.v1._serializer import _construct_mdio_dataset + # Step 1: Create MDIO dataset from TEST_SCHEMA dataset = V1Dataset.model_validate(TEST_SCHEMA) mdio_dataset = _construct_mdio_dataset(dataset) - + mdio_path = tmp_path / "test_dataset.mdio" mdio_dataset.to_mdio(store=str(mdio_path)) - + # Step 2: Read back the MDIO dataset with mdio.open(str(mdio_path)) as reader: # Step 3: Extract information to reconstruct Pydantic model variables = [] - + # Add dimension variables for dim_name in ["dim0", "dim1"]: if dim_name in reader.coords: @@ -363,7 +364,7 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: "dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}], } variables.append(var_dict) - + # Add data variables with their metadata for var_name in reader.data_vars: var = reader[var_name] @@ -372,7 +373,7 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: "dataType": str(var.dtype), "dimensions": list(var.dims), } - + # Reconstruct metadata based on original TEST_SCHEMA if var_name == "coord": var_dict["metadata"] = { @@ -392,7 +393,7 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: }, } variables.append(var_dict) - + # Step 4: Create Pydantic model data (camelCase) dataset_data = { "metadata": { @@ -400,13 +401,13 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: "apiVersion": reader.attrs.get("apiVersion", "1.0.0"), "createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"), }, - "variables": variables + "variables": variables, } - + # Step 5: Validate with Pydantic and serialize to JSON using by_alias=True pydantic_dataset = V1Dataset.model_validate(dataset_data) json_str = pydantic_dataset.model_dump_json(by_alias=True) - + # Verify it's valid JSON and camelCase parsed = json.loads(json_str) @@ -415,34 +416,34 @@ def test_mdio_dataset_to_json(self, tmp_path: Path) -> None: assert "apiVersion" in parsed["metadata"] assert "createdOn" in parsed["metadata"] assert "dataType" in parsed["variables"][0] - + # Verify the conversion preserved data assert pydantic_dataset.metadata.name == "test_dataset" def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: """Test full round-trip: TEST_SCHEMA JSON -> MDIO -> JSON using to_mdio.""" - from mdio.core.v1._serializer import _construct_mdio_dataset from mdio.core.v1 import mdio - + from mdio.core.v1._serializer import _construct_mdio_dataset + # Step 1: Start with TEST_SCHEMA (input JSON) original_dataset = V1Dataset.model_validate(TEST_SCHEMA) original_json = original_dataset.model_dump_json(by_alias=True) original_parsed = json.loads(original_json) - + # Verify original is camelCase assert "apiVersion" in original_parsed["metadata"] assert "createdOn" in original_parsed["metadata"] - + # Step 2: Convert to MDIO dataset and save mdio_dataset = _construct_mdio_dataset(original_dataset) mdio_path = tmp_path / "round_trip.mdio" mdio_dataset.to_mdio(store=str(mdio_path)) - + # Step 3: Read back from MDIO and convert to JSON with mdio.open(str(mdio_path)) as reader: # Reconstruct the schema structure variables = [] - + # Add dimension variables for dim_name in ["dim0", "dim1"]: if dim_name in reader.coords: @@ -453,7 +454,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: "dimensions": [{"name": dim_name, "size": reader.dims[dim_name]}], } variables.append(var_dict) - + # Add coordinate variables that are not dimensions for coord_name, coord in reader.coords.items(): if coord_name not in ["dim0", "dim1"]: # Skip dimension coordinates @@ -462,7 +463,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: "dataType": str(coord.dtype), "dimensions": list(coord.dims), } - + # Add metadata for coord variable from original TEST_SCHEMA if coord_name == "coord": var_dict["metadata"] = { @@ -473,7 +474,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: "unitsV1": {"length": "m"}, } variables.append(var_dict) - + # Add data variables with original metadata for var_name in reader.data_vars: var = reader[var_name] @@ -482,7 +483,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: "dataType": str(var.dtype), "dimensions": list(var.dims), } - + # Add original metadata back from TEST_SCHEMA if var_name == "actual_variable": var_dict["compressor"] = {"name": "blosc", "level": 3} @@ -494,7 +495,7 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: }, } variables.append(var_dict) - + # Create final dataset final_data = { "metadata": { @@ -502,22 +503,24 @@ def test_full_round_trip_json_mdio_json(self, tmp_path: Path) -> None: "apiVersion": reader.attrs.get("apiVersion", "1.0.0"), "createdOn": reader.attrs.get("createdOn", "2023-01-01T00:00:00Z"), }, - "variables": variables + "variables": variables, } - + final_dataset = V1Dataset.model_validate(final_data) final_json = final_dataset.model_dump_json(by_alias=True) final_parsed = json.loads(final_json) - + # Step 4: Verify round-trip integrity assert final_parsed["metadata"]["name"] == original_parsed["metadata"]["name"] - assert final_parsed["metadata"]["apiVersion"] == original_parsed["metadata"]["apiVersion"] - + assert ( + final_parsed["metadata"]["apiVersion"] == original_parsed["metadata"]["apiVersion"] + ) + # Verify camelCase is preserved assert "apiVersion" in final_parsed["metadata"] assert "createdOn" in final_parsed["metadata"] assert "dataType" in final_parsed["variables"][0] - + # Verify variable structure is preserved original_var_names = {v["name"] for v in original_parsed["variables"]} final_var_names = {v["name"] for v in final_parsed["variables"]} @@ -543,9 +546,9 @@ def test_invalid_snake_case_json_fails(self) -> None: "data_type": "float32", # snake_case should fail "dimensions": ["dim0"], } - ] + ], } - + # This should fail validation with pytest.raises(ValidationError): V1Dataset.model_validate(invalid_snake_case_schema) @@ -555,20 +558,20 @@ def test_camel_case_serialization_only(self) -> None: dataset = V1Dataset.model_validate(TEST_SCHEMA) json_str = dataset.model_dump_json() parsed = json.loads(json_str) - + # Verify camelCase fields are present assert "apiVersion" in parsed["metadata"] assert "createdOn" in parsed["metadata"] - + # Verify snake_case fields are NOT present assert "api_version" not in parsed["metadata"] assert "created_on" not in parsed["metadata"] - + # Check variables use camelCase for var in parsed["variables"]: assert "dataType" in var assert "data_type" not in var - + # Check nested metadata if present if "metadata" in var and "chunkGrid" in var["metadata"]: assert "chunkGrid" in var["metadata"] diff --git a/uv.lock b/uv.lock index a9b23860..a81fb7b0 100644 --- a/uv.lock +++ b/uv.lock @@ -1811,7 +1811,6 @@ requires-dist = [ { name = "s3fs", marker = "extra == 'cloud'", specifier = "==2024.12.0" }, { name = "segy", specifier = ">=0.4.0,<0.5.0" }, { name = "tqdm", specifier = ">=4.67.0,<5.0.0" }, - { name = "xarray", specifier = ">=2025.3.1" }, { name = "xarray", specifier = ">=2025.4.0" }, { name = "zarr", specifier = ">=3.0.8,<4.0.0" }, { name = "zfpy", marker = "extra == 'lossy'", specifier = ">=1.0.1,<2.0.0" }, From ea827677607024d405cb523b72edca87e06fdd29 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Wed, 28 May 2025 17:39:44 +0000 Subject: [PATCH 42/55] Fix output not being written to temp path --- tests/unit/test_template_factory.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_template_factory.py b/tests/unit/test_template_factory.py index 6f57eb8c..5dc6ffa0 100644 --- a/tests/unit/test_template_factory.py +++ b/tests/unit/test_template_factory.py @@ -4,6 +4,7 @@ from datetime import UTC from datetime import datetime +from pathlib import Path import pytest from pydantic import ValidationError @@ -21,7 +22,7 @@ from mdio.schemas.dtype import StructuredType -def test_make_toy_dataset() -> None: +def test_make_toy_dataset(tmp_path: Path) -> None: """Test that make_toy_dataset returns a Dataset object using the factory pattern.""" # Create dataset using factory template = SCHEMA_TEMPLATE_MAP[MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC] @@ -52,7 +53,8 @@ def test_make_toy_dataset() -> None: print("\nDataset Schema JSON:") print(ds.model_dump_json(indent=2)) - write_mdio_metadata(ds, "test_toy_dataset.mdio") + mdio_path = tmp_path / "test_toy_dataset.mdio" + write_mdio_metadata(ds, str(mdio_path)) # Verify metadata assert ds.metadata.name == "campos_3d" From 3b49dc890dabfda5326c163af5678c15b9485f21 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 29 May 2025 16:22:51 +0000 Subject: [PATCH 43/55] Expand testcases --- tests/unit/schema/v1/test_template_builder.py | 216 +++++++++++++++++- 1 file changed, 212 insertions(+), 4 deletions(-) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 80c4dbb4..f17ccb9b 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -31,7 +31,7 @@ def test_dimension_builder_state() -> None: builder = MDIODatasetBuilder("test_dataset") # First dimension should change state to HAS_DIMENSIONS and create a variable - builder = builder.add_dimension("x", 100, long_name="X Dimension") + builder.add_dimension("x", 100, long_name="X Dimension") assert builder._state == _BuilderState.HAS_DIMENSIONS assert len(builder._dimensions) == 1 # noqa: PLR2004 assert len(builder._variables) == 1 # noqa: PLR2004 @@ -43,7 +43,7 @@ def test_dimension_builder_state() -> None: assert builder._variables[0].dimensions[0].name == "x" # Adding another dimension should maintain state and create another variable - builder = builder.add_dimension("y", 200, data_type=ScalarType.UINT32) + builder.add_dimension("y", 200, data_type=ScalarType.UINT32) assert builder._state == _BuilderState.HAS_DIMENSIONS assert len(builder._dimensions) == 2 # noqa: PLR2004 assert len(builder._variables) == 2 # noqa: PLR2004 @@ -54,12 +54,12 @@ def test_dimension_builder_state() -> None: assert builder._variables[1].dimensions[0].name == "y" -def test_dimension_with_metadata() -> None: +def test_dimension_with_units() -> None: """Test adding dimensions with custom metadata.""" builder = MDIODatasetBuilder("test_dataset") # Add dimension with custom metadata - builder = builder.add_dimension( + builder.add_dimension( "depth", size=100, data_type=ScalarType.FLOAT32, @@ -73,6 +73,214 @@ def test_dimension_with_metadata() -> None: assert depth_var.metadata.units_v1.length == "m" +def test_dimension_with_attributes() -> None: + """Test adding dimensions with attributes.""" + builder = MDIODatasetBuilder("test_dataset") + + # Add dimension with attributes + builder.add_dimension( + "depth", + size=100, + data_type=ScalarType.FLOAT32, + metadata={"attributes": {"MGA": 51}}, + ) + + assert len(builder._variables) == 1 + depth_var = builder._variables[0] + assert depth_var.name == "depth" + assert depth_var.data_type == ScalarType.FLOAT32 + assert depth_var.metadata.attributes["MGA"] == 51 + + +def test_dimension_with_chunk_grid() -> None: + """Test adding dimensions with chunk grid.""" + builder = MDIODatasetBuilder("test_dataset") + + # Add dimension with chunk grid + builder.add_dimension( + "depth", + size=100, + data_type=ScalarType.FLOAT32, + metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}}, + ) + + assert len(builder._variables) == 1 + depth_var = builder._variables[0] + assert depth_var.name == "depth" + assert depth_var.data_type == ScalarType.FLOAT32 + assert depth_var.metadata.chunk_grid.name == "regular" + assert depth_var.metadata.chunk_grid.configuration.chunk_shape == [20] + + +def test_dimension_with_stats() -> None: + """Test adding dimensions with stats.""" + builder = MDIODatasetBuilder("test_dataset") + + # Add dimension with stats + builder.add_dimension( + "depth", + size=100, + data_type=ScalarType.FLOAT32, + metadata={ + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, + } + }, + ) + + assert len(builder._variables) == 1 + depth_var = builder._variables[0] + assert depth_var.name == "depth" + assert depth_var.data_type == ScalarType.FLOAT32 + assert depth_var.metadata.stats_v1.count == 100 + assert depth_var.metadata.stats_v1.sum == 1215.1 + + +def test_dimension_with_full_metadata() -> None: + """Test adding dimensions with all metadata.""" + builder = MDIODatasetBuilder("test_dataset") + + # Add dimension with all metadata + builder.add_dimension( + "depth", + size=100, + data_type=ScalarType.FLOAT32, + metadata={ + "unitsV1": {"length": "m"}, + "attributes": {"MGA": 51}, + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}, + "statsV1": { + "count": 100, + "sum": 1215.1, + "sumSquares": 125.12, + "min": 5.61, + "max": 10.84, + "histogram": {"binCenters": [1, 2], "counts": [10, 15]}, + }, + }, + ) + + assert len(builder._variables) == 1 + depth_var = builder._variables[0] + assert depth_var.name == "depth" + assert depth_var.data_type == ScalarType.FLOAT32 + assert depth_var.metadata.units_v1.length == "m" + assert depth_var.metadata.attributes["MGA"] == 51 + assert depth_var.metadata.chunk_grid.name == "regular" + assert depth_var.metadata.chunk_grid.configuration.chunk_shape == [20] + assert depth_var.metadata.stats_v1.count == 100 + assert depth_var.metadata.stats_v1.sum == 1215.1 + assert depth_var.metadata.stats_v1.sum_squares == 125.12 + assert depth_var.metadata.stats_v1.min == 5.61 + assert depth_var.metadata.stats_v1.max == 10.84 + assert depth_var.metadata.stats_v1.histogram.bin_centers == [1, 2] + assert depth_var.metadata.stats_v1.histogram.counts == [10, 15] + + j = builder.build().json() + print(j) + + +def test_coordiante_with_units() -> None: + """Test adding coordinates with units.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 100) + + # Add coordinate with units + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) + + assert len(builder._variables) == 2 + assert len(builder._coordinates) == 1 + cdp_var = builder._coordinates[0] + assert cdp_var.name == "cdp" + assert cdp_var.data_type == ScalarType.FLOAT32 + assert cdp_var.metadata.units_v1.length == "m" + +def test_coordinate_with_attributes() -> None: + """Test adding coordinates with attributes.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 100) + + # Add coordinate with attributes + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"attributes": {"MGA": 51}}) + + assert len(builder._variables) == 2 + assert len(builder._coordinates) == 1 + cdp_var = builder._coordinates[0] + assert cdp_var.name == "cdp" + assert cdp_var.data_type == ScalarType.FLOAT32 + assert cdp_var.metadata.attributes["MGA"] == 51 + +def test_coordinate_with_chunk_grid() -> None: + """Test adding coordinates with chunk grid.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 100) + + # Add coordinate with chunk grid + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20, 20]}}}) + + assert len(builder._variables) == 2 + assert len(builder._coordinates) == 1 + cdp_var = builder._coordinates[0] + assert cdp_var.name == "cdp" + assert cdp_var.data_type == ScalarType.FLOAT32 + assert cdp_var.metadata.chunk_grid.name == "regular" + assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20, 20] + +def test_coordinate_with_stats() -> None: + """Test adding coordinates with stats.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 100) + + # Add coordinate with stats + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"statsV1": {"count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, "histogram": {"binCenters": [1, 2], "counts": [10, 15]}}}) + + assert len(builder._variables) == 2 + assert len(builder._coordinates) == 1 + cdp_var = builder._coordinates[0] + assert cdp_var.name == "cdp" + assert cdp_var.data_type == ScalarType.FLOAT32 + assert cdp_var.metadata.stats_v1.count == 100 + assert cdp_var.metadata.stats_v1.sum == 1215.1 + +def test_coordinate_with_full_metadata() -> None: + """Test adding coordinates with all metadata.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 100) + + # Add coordinate with all metadata + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}, "attributes": {"MGA": 51}, "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}, "statsV1": {"count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, "histogram": {"binCenters": [1, 2], "counts": [10, 15]}}}) + + assert len(builder._variables) == 2 + assert len(builder._coordinates) == 1 + cdp_var = builder._coordinates[0] + assert cdp_var.name == "cdp" + assert cdp_var.data_type == ScalarType.FLOAT32 + assert cdp_var.metadata.units_v1.length == "m" + assert cdp_var.metadata.attributes["MGA"] == 51 + assert cdp_var.metadata.chunk_grid.name == "regular" + assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20] + assert cdp_var.metadata.stats_v1.count == 100 + assert cdp_var.metadata.stats_v1.sum == 1215.1 + assert cdp_var.metadata.stats_v1.sum_squares == 125.12 + assert cdp_var.metadata.stats_v1.min == 5.61 + assert cdp_var.metadata.stats_v1.max == 10.84 + assert cdp_var.metadata.stats_v1.histogram.bin_centers == [1, 2] + assert cdp_var.metadata.stats_v1.histogram.counts == [10, 15] + + j = builder.build().json() + print(j) + + def test_coordinate_builder_state() -> None: """Test coordinate builder state transitions and functionality.""" builder = MDIODatasetBuilder("test_dataset") From f064ec6aad89b09c7e90b814f4fd69f8e3a9cfc6 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 14:05:10 +0000 Subject: [PATCH 44/55] Adds json dump overload --- src/mdio/schemas/core.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py index c61fd60b..7768be06 100644 --- a/src/mdio/schemas/core.py +++ b/src/mdio/schemas/core.py @@ -66,3 +66,9 @@ def model_dump_json(self, *args, **kwargs) -> dict: # noqa: ANN201 ANN001 ANN00 if "exclude_none" not in kwargs: kwargs["exclude_none"] = True return super().model_dump_json(*args, **kwargs) + + def json(self, *args, **kwargs) -> dict: # noqa: ANN201 ANN001 ANN002 ANN003 + """Dump JSON using camelCase aliases and excluding None values by default.""" + if "by_alias" not in kwargs: + kwargs["by_alias"] = True + return self.model_dump_json(*args, **kwargs) From cfe234c1911fef01a68c4c988af1c60e7810120f Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 14:06:01 +0000 Subject: [PATCH 45/55] Fix metadata serialization --- src/mdio/core/v1/_serializer.py | 49 ++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index c6cc8146..f5956bab 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -44,17 +44,41 @@ def make_coordinate( dimensions: list[NamedDimension | str], data_type: ScalarType | StructuredType, long_name: str = None, - metadata: list[AllUnits | UserAttributes] | None = None, + metadata: list[AllUnits | UserAttributes] | dict[str, Any] | None = None, ) -> Coordinate: """Create a Coordinate with the given name, dimensions, data_type, and metadata.""" - coordinate_dict = { - "name": name, - "longName": long_name, - "dimensions": dimensions, - "dataType": data_type, - "metadata": metadata, - } - return Coordinate(**coordinate_dict) + # Build metadata list of AllUnits or UserAttributes to satisfy Coordinate.schema + coord_meta_list: list[AllUnits | UserAttributes] | None = None + if metadata is not None: + items: list[AllUnits | UserAttributes] = [] + # single dict input + if isinstance(metadata, dict): + if "unitsV1" in metadata: + items.append(AllUnits(**{"unitsV1": metadata["unitsV1"]})) + if "attributes" in metadata: + items.append(UserAttributes(**{"attributes": metadata["attributes"]})) + # list input may contain dict or model instances + elif isinstance(metadata, list): + for md in metadata: + if isinstance(md, AllUnits) or isinstance(md, UserAttributes): + items.append(md) + elif isinstance(md, dict): + if "unitsV1" in md: + items.append(AllUnits(**{"unitsV1": md["unitsV1"]})) + if "attributes" in md: + items.append(UserAttributes(**{"attributes": md["attributes"]})) + else: + raise TypeError(f"Unsupported metadata element type for coordinate: {type(md)}") + else: + raise TypeError(f"Unsupported metadata type for coordinate: {type(metadata)}") + coord_meta_list = items or None + return Coordinate( + name=name, + longName=long_name, + dimensions=dimensions, + dataType=data_type, + metadata=coord_meta_list, + ) def make_variable( # noqa: PLR0913 PLR0912 @@ -115,7 +139,12 @@ def _to_serializable(val: object) -> dict[str, Any] | object: var_metadata = VariableMetadata(**converted_dict) elif isinstance(metadata, VariableMetadata): - var_metadata = metadata + # Flatten any single-element list fields in metadata + md = metadata.model_dump(by_alias=True, exclude_none=True) + for key, value in list(md.items()): + if isinstance(value, list) and len(value) == 1: + md[key] = value[0] + var_metadata = VariableMetadata(**md) else: msg = f"Unsupported metadata type: {type(metadata)}" From 8a460ba04488af8644f49a48901b8a11f50a216e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 14:06:44 +0000 Subject: [PATCH 46/55] Add convenience to_mdio function to write metadata --- src/mdio/core/v1/builder.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/mdio/core/v1/builder.py b/src/mdio/core/v1/builder.py index 124f4e18..8ffef60f 100644 --- a/src/mdio/core/v1/builder.py +++ b/src/mdio/core/v1/builder.py @@ -221,10 +221,26 @@ def build(self) -> Dataset: return make_dataset(all_variables, metadata) + def to_mdio( + self, + store: str, + mode: str = "w", + compute: bool = False, + **kwargs: Mapping[str, str | int | float | bool], + ) -> Dataset: + """Write the dataset to a Zarr store and return the constructed mdio.Dataset. + + This function constructs an mdio.Dataset from the MDIO dataset and writes its metadata + to a Zarr store. The actual data is not written, only the metadata structure is created. + """ + return write_mdio_metadata(self.build(), store, mode, compute, **kwargs) + def write_mdio_metadata( mdio_ds: Dataset, store: str, + mode: str = "w", + compute: bool = False, **kwargs: Mapping[str, str | int | float | bool], ) -> mdio.Dataset: """Write MDIO metadata to a Zarr store and return the constructed mdio.Dataset. @@ -234,7 +250,9 @@ def write_mdio_metadata( Args: mdio_ds: The MDIO dataset to serialize - store: Path to the Zarr store + store: Path to the Zarr or .mdio store + mode: Write mode to pass to to_mdio(), e.g. 'w' or 'a' + compute: Whether to compute (write) array chunks (True) or only metadata (False) **kwargs: Additional arguments to pass to to_mdio() Returns: @@ -270,11 +288,11 @@ def _generate_encodings() -> dict: ds.to_mdio( store, - mode="w", + mode=mode, zarr_format=2, consolidated=True, safe_chunks=False, - compute=False, + compute=compute, encoding=_generate_encodings(), **kwargs, ) From 70feb35bb5e5368c53d5693108bc0a1e7bc2258e Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 14:06:53 +0000 Subject: [PATCH 47/55] Update tests --- tests/unit/schema/v1/test_template_builder.py | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index f17ccb9b..0211302e 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -234,22 +234,6 @@ def test_coordinate_with_chunk_grid() -> None: assert cdp_var.metadata.chunk_grid.name == "regular" assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20, 20] -def test_coordinate_with_stats() -> None: - """Test adding coordinates with stats.""" - builder = MDIODatasetBuilder("test_dataset") - builder.add_dimension("inline", 100) - builder.add_dimension("crossline", 100) - - # Add coordinate with stats - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"statsV1": {"count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, "histogram": {"binCenters": [1, 2], "counts": [10, 15]}}}) - - assert len(builder._variables) == 2 - assert len(builder._coordinates) == 1 - cdp_var = builder._coordinates[0] - assert cdp_var.name == "cdp" - assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.stats_v1.count == 100 - assert cdp_var.metadata.stats_v1.sum == 1215.1 def test_coordinate_with_full_metadata() -> None: """Test adding coordinates with all metadata.""" @@ -258,7 +242,7 @@ def test_coordinate_with_full_metadata() -> None: builder.add_dimension("crossline", 100) # Add coordinate with all metadata - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}, "attributes": {"MGA": 51}, "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}, "statsV1": {"count": 100, "sum": 1215.1, "sumSquares": 125.12, "min": 5.61, "max": 10.84, "histogram": {"binCenters": [1, 2], "counts": [10, 15]}}}) + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}, "attributes": {"MGA": 51}, "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}}) assert len(builder._variables) == 2 assert len(builder._coordinates) == 1 @@ -269,13 +253,6 @@ def test_coordinate_with_full_metadata() -> None: assert cdp_var.metadata.attributes["MGA"] == 51 assert cdp_var.metadata.chunk_grid.name == "regular" assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20] - assert cdp_var.metadata.stats_v1.count == 100 - assert cdp_var.metadata.stats_v1.sum == 1215.1 - assert cdp_var.metadata.stats_v1.sum_squares == 125.12 - assert cdp_var.metadata.stats_v1.min == 5.61 - assert cdp_var.metadata.stats_v1.max == 10.84 - assert cdp_var.metadata.stats_v1.histogram.bin_centers == [1, 2] - assert cdp_var.metadata.stats_v1.histogram.counts == [10, 15] j = builder.build().json() print(j) From fe2c84bbe4a535c19752effef1d66c0795fa09e2 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 14:07:06 +0000 Subject: [PATCH 48/55] Begin documentation --- .gitignore | 3 + docs/tutorials/builder.ipynb | 847 +++++++++++++++++++++++++++++++++++ docs/tutorials/builder.md | 146 ++++++ 3 files changed, 996 insertions(+) create mode 100644 docs/tutorials/builder.ipynb create mode 100644 docs/tutorials/builder.md diff --git a/.gitignore b/.gitignore index bfdc38f2..e77a2883 100644 --- a/.gitignore +++ b/.gitignore @@ -153,3 +153,6 @@ mdio1/* pytest-of-* tmp debugging/* + +# Docs +docs/tutorials/output/* diff --git a/docs/tutorials/builder.ipynb b/docs/tutorials/builder.ipynb new file mode 100644 index 00000000..be2483fa --- /dev/null +++ b/docs/tutorials/builder.ipynb @@ -0,0 +1,847 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9fd6d920", + "metadata": {}, + "source": [ + "# Constructing a v1 Dataset with the MDIODatasetBuilder\n", + "\n", + "In this notebook, we demonstrate how to use the `MDIODatasetBuilder` to build and write a post-stack depth-migrated (PSDM) seismic dataset using the MDIO v1 schema." + ] + }, + { + "cell_type": "markdown", + "id": "1240095a", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c00c220", + "metadata": {}, + "outputs": [], + "source": [ + "from mdio.core.v1.builder import MDIODatasetBuilder, write_mdio_metadata\n", + "from mdio.schemas.dtype import ScalarType, StructuredType\n", + "from mdio.schemas.compressors import Blosc, ZFP\n", + "\n", + "# Auxiliary import for formatting and pretty printing\n", + "from rich import print as rprint\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "id": "a9432bdc", + "metadata": {}, + "source": [ + "## 1. Create Builder and Add Dimensions\n", + "First, instantiate a builder instance with a name and optional global attributes. The builder provides a chainable interface to construct bespoke Dataset contracts that may not exist in the factory.\n", + "\n", + "Attributes are free-form and intended to describe the overall dataset, data providence, processing steps, or any other information that would enrich the Dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35505bee", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize builder for PSDM stack\n", + "builder = MDIODatasetBuilder(\n", + " name=\"psdm_stack_example\",\n", + " attributes={ 'description': 'Example PSDM stack' }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1befa778", + "metadata": {}, + "source": [ + "# 2. Add Dimensions\n", + "\n", + "The Dimensions represent the core grid of the Dataset.\n", + "\n", + "They are one-dimensional tick-labels which may be populated with values for value-based and index-based access to the Dataset or inert for index-based access to the Dataset.\n", + "\n", + "It is generally recommended to fully populate the dimensions, but is beyond the scope of this example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd9df8ca", + "metadata": {}, + "outputs": [], + "source": [ + "# Add core dimensions: inline, crossline, depth\n", + "builder.add_dimension('inline', 256, long_name='Inline Number')\\\n", + " .add_dimension('crossline', 512, long_name='Crossline Number')\\\n", + " .add_dimension('depth', 384, long_name='Depth Sample')" + ] + }, + { + "cell_type": "markdown", + "id": "4ac0a62e", + "metadata": {}, + "source": [ + "# 3. Add CDP Coordinates (UTM Easting/Northing)\n", + "\n", + "Coordinates are N-dimensional arrays which enrich the dataset by providing auxiliary coordinate systems.\n", + "\n", + "In this example, our Dataset contract shows that we expect that our inline and crossline indices can be translated into real world coordinate values in Map Grid of Australia [Zone 51](https://epsg.io/28351)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d2da0c3", + "metadata": {}, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValidationError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# CDP X and Y on inline-crossline grid\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcdp_x\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43minline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcrossline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mCDP X (UTM Easting)\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mScalarType\u001b[49m\u001b[43m.\u001b[49m\u001b[43mFLOAT64\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munitsV1\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlength\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mm\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mattributes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMGA\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m51\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m)\u001b[49m.add_coordinate(\n\u001b[32m 9\u001b[39m name=\u001b[33m'\u001b[39m\u001b[33mcdp_y\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m dimensions=[\u001b[33m'\u001b[39m\u001b[33minline\u001b[39m\u001b[33m'\u001b[39m,\u001b[33m'\u001b[39m\u001b[33mcrossline\u001b[39m\u001b[33m'\u001b[39m],\n\u001b[32m 11\u001b[39m long_name=\u001b[33m'\u001b[39m\u001b[33mCDP Y (UTM Northing)\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 12\u001b[39m data_type=ScalarType.FLOAT64,\n\u001b[32m 13\u001b[39m metadata={\n\u001b[32m 14\u001b[39m \u001b[33m'\u001b[39m\u001b[33munitsV1\u001b[39m\u001b[33m'\u001b[39m: {\u001b[33m'\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mm\u001b[39m\u001b[33m'\u001b[39m},\n\u001b[32m 15\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mattributes\u001b[39m\u001b[33m\"\u001b[39m: {\u001b[33m\"\u001b[39m\u001b[33mMGA\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m51\u001b[39m}\n\u001b[32m 16\u001b[39m },\n\u001b[32m 17\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/builder.py:139\u001b[39m, in \u001b[36mMDIODatasetBuilder.add_coordinate\u001b[39m\u001b[34m(self, name, long_name, dimensions, data_type, metadata)\u001b[39m\n\u001b[32m 135\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 136\u001b[39m dim_objects.append(dim)\n\u001b[32m 138\u001b[39m \u001b[38;5;28mself\u001b[39m._coordinates.append(\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[43mmake_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 140\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 141\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 142\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdim_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 143\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 144\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 145\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 146\u001b[39m )\n\u001b[32m 147\u001b[39m \u001b[38;5;28mself\u001b[39m._state = _BuilderState.HAS_COORDINATES\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/_serializer.py:57\u001b[39m, in \u001b[36mmake_coordinate\u001b[39m\u001b[34m(name, dimensions, data_type, long_name, metadata)\u001b[39m\n\u001b[32m 49\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create a Coordinate with the given name, dimensions, data_type, and metadata.\"\"\"\u001b[39;00m\n\u001b[32m 50\u001b[39m coordinate_dict = {\n\u001b[32m 51\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: name,\n\u001b[32m 52\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mlongName\u001b[39m\u001b[33m\"\u001b[39m: long_name,\n\u001b[32m (...)\u001b[39m\u001b[32m 55\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mmetadata\u001b[39m\u001b[33m\"\u001b[39m: metadata,\n\u001b[32m 56\u001b[39m }\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCoordinate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcoordinate_dict\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/.venv/lib/python3.12/site-packages/pydantic/main.py:212\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(self, **data)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[32m 211\u001b[39m __tracebackhide__ = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m212\u001b[39m validated_self = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m validated_self:\n\u001b[32m 214\u001b[39m warnings.warn(\n\u001b[32m 215\u001b[39m \u001b[33m'\u001b[39m\u001b[33mA custom validator is returning a value other than `self`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mReturning anything other than `self` from a top level model validator isn\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt supported when validating via `__init__`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 217\u001b[39m \u001b[33m'\u001b[39m\u001b[33mSee the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 218\u001b[39m category=\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 219\u001b[39m )\n", + "\u001b[31mValidationError\u001b[39m: 2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden" + ] + } + ], + "source": [ + "# CDP X and Y on inline-crossline grid\n", + "builder.add_coordinate(\n", + " name='cdp_x',\n", + " dimensions=['inline','crossline'],\n", + " long_name='CDP X (UTM Easting)',\n", + " data_type=ScalarType.FLOAT64,\n", + " metadata={\n", + " 'unitsV1': {'length': 'm'}, \n", + " \"attributes\": {\"MGA\": 51}\n", + " },\n", + ").add_coordinate(\n", + " name='cdp_y',\n", + " dimensions=['inline','crossline'],\n", + " long_name='CDP Y (UTM Northing)',\n", + " data_type=ScalarType.FLOAT64,\n", + " metadata={\n", + " 'unitsV1': {'length': 'm'},\n", + " \"attributes\": {\"MGA\": 51}\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "45954756", + "metadata": {}, + "source": [ + "## 3. Add Post-Stack Amplitude Volume Variable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b4c8aa7", + "metadata": {}, + "outputs": [], + "source": [ + "builder.add_variable(\n", + " name='stack_amplitude',\n", + " dimensions=['inline','crossline','depth'],\n", + " data_type=ScalarType.FLOAT32,\n", + " compressor=Blosc(algorithm='zstd', level=3),\n", + " coordinates=['inline','crossline','cdp_x','cdp_y'],\n", + " metadata={\n", + " 'chunkGrid': {'name': 'regular', 'configuration': {'chunkShape': [64, 64, 64]}}\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0ed7500f", + "metadata": {}, + "source": [ + "## 4. Build and Write" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d7df200f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspaces/mdio-python/src/mdio/core/v1/_overloads.py:32: FutureWarning: zarr_version is deprecated, use zarr_format\n", + " return super().to_zarr(*args, store=store, **kwargs)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.MDIODataset> Size: 203MB\n",
+              "Dimensions:          (inline: 256, crossline: 512, depth: 384)\n",
+              "Coordinates:\n",
+              "  * inline           (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
+              "  * crossline        (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n",
+              "  * depth            (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
+              "Data variables:\n",
+              "    stack_amplitude  (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n",
+              "    cdp_x            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
+              "    cdp_y            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
+              "Attributes:\n",
+              "    apiVersion:  1.0.0\n",
+              "    createdOn:   2025-05-29 14:18:21.113904+00:00\n",
+              "    name:        psdm_stack_example\n",
+              "    attributes:  {'description': 'Example PSDM stack'}
" + ], + "text/plain": [ + " Size: 203MB\n", + "Dimensions: (inline: 256, crossline: 512, depth: 384)\n", + "Coordinates:\n", + " * inline (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", + " * crossline (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n", + " * depth (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", + "Data variables:\n", + " stack_amplitude (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n", + " cdp_x (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", + " cdp_y (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", + "Attributes:\n", + " apiVersion: 1.0.0\n", + " createdOn: 2025-05-29 14:18:21.113904+00:00\n", + " name: psdm_stack_example\n", + " attributes: {'description': 'Example PSDM stack'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Write only metadata to .mdio store and build the interactable Dataset object\n", + "ds = builder.to_mdio(store='output/psdm_stack_example.mdio')\n", + "\n", + "# Display the interactable Dataset\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "9efbeb0b", + "metadata": {}, + "source": [ + "# Build and view the Dataset contract" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bbcca480", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
{\n",
+              "    'metadata': {\n",
+              "        'name': 'psdm_stack_example',\n",
+              "        'apiVersion': '1.0.0',\n",
+              "        'createdOn': '2025-05-29T14:18:21.113904Z',\n",
+              "        'attributes': {'description': 'Example PSDM stack'}\n",
+              "    },\n",
+              "    'variables': [\n",
+              "        {\n",
+              "            'dataType': 'int32',\n",
+              "            'dimensions': [{'name': 'inline', 'size': 256}],\n",
+              "            'name': 'inline',\n",
+              "            'longName': 'Inline Number'\n",
+              "        },\n",
+              "        {\n",
+              "            'dataType': 'int32',\n",
+              "            'dimensions': [{'name': 'crossline', 'size': 512}],\n",
+              "            'name': 'crossline',\n",
+              "            'longName': 'Crossline Number'\n",
+              "        },\n",
+              "        {\n",
+              "            'dataType': 'int32',\n",
+              "            'dimensions': [{'name': 'depth', 'size': 384}],\n",
+              "            'name': 'depth',\n",
+              "            'longName': 'Depth Sample'\n",
+              "        },\n",
+              "        {\n",
+              "            'dataType': 'float32',\n",
+              "            'dimensions': [\n",
+              "                {'name': 'inline', 'size': 256},\n",
+              "                {'name': 'crossline', 'size': 512},\n",
+              "                {'name': 'depth', 'size': 384}\n",
+              "            ],\n",
+              "            'compressor': {'name': 'blosc', 'algorithm': 'zstd', 'level': 3, 'shuffle': 1, 'blocksize': 0},\n",
+              "            'name': 'stack_amplitude',\n",
+              "            'coordinates': ['inline', 'crossline', 'cdp_x', 'cdp_y'],\n",
+              "            'metadata': {'chunkGrid': {'name': 'regular', 'configuration': {'chunkShape': [64, 64, 64]}}}\n",
+              "        },\n",
+              "        {\n",
+              "            'dataType': 'float64',\n",
+              "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
+              "            'name': 'cdp_x',\n",
+              "            'longName': 'CDP X (UTM Easting)',\n",
+              "            'metadata': {'unitsV1': {'length': 'm'}}\n",
+              "        },\n",
+              "        {\n",
+              "            'dataType': 'float64',\n",
+              "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
+              "            'name': 'cdp_y',\n",
+              "            'longName': 'CDP Y (UTM Northing)',\n",
+              "            'metadata': {'unitsV1': {'length': 'm'}}\n",
+              "        }\n",
+              "    ]\n",
+              "}\n",
+              "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'psdm_stack_example'\u001b[0m,\n", + " \u001b[32m'apiVersion'\u001b[0m: \u001b[32m'1.0.0'\u001b[0m,\n", + " \u001b[32m'createdOn'\u001b[0m: \u001b[32m'2025-05-29T14:18:21.113904Z'\u001b[0m,\n", + " \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Example PSDM stack'\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[32m'variables'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Inline Number'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Crossline Number'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Depth Sample'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m]\u001b[0m,\n", + " \u001b[32m'compressor'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'blosc'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'zstd'\u001b[0m, \u001b[32m'level'\u001b[0m: \u001b[1;36m3\u001b[0m, \u001b[32m'shuffle'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'blocksize'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'stack_amplitude'\u001b[0m,\n", + " \u001b[32m'coordinates'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'inline'\u001b[0m, \u001b[32m'crossline'\u001b[0m, \u001b[32m'cdp_x'\u001b[0m, \u001b[32m'cdp_y'\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkGrid'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'regular'\u001b[0m, \u001b[32m'configuration'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkShape'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_x'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP X \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Easting\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_y'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP Y \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Northing\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Build our Dataset model from the builder\n", + "dataset = builder.build()\n", + "\n", + "# Serialize the Dataset model to JSON\n", + "contract = json.loads(dataset.json())\n", + "\n", + "# Reorder the contract so that metadata is displayed first\n", + "ordered_contract = {\n", + " \"metadata\": contract[\"metadata\"],\n", + " \"variables\": contract[\"variables\"],\n", + "}\n", + "\n", + "rprint(ordered_contract)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (mdio-dev)", + "language": "python", + "name": "mdio-dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/tutorials/builder.md b/docs/tutorials/builder.md new file mode 100644 index 00000000..f41f68a4 --- /dev/null +++ b/docs/tutorials/builder.md @@ -0,0 +1,146 @@ +# Constructing a v1 Dataset with the MDIODatasetBuilder + +In this tutorial, we'll walk through how to use the `MDIODatasetBuilder` class to programmatically construct an MDIO v1 dataset. The builder enforces a specific build order to ensure a valid dataset: + +1. Add dimensions via `add_dimension()` +2. (Optional) Add coordinates via `add_coordinate()` +3. Add variables via `add_variable()` +4. Call `build()` to finalize the dataset. + +## Importing the Builder + +```python +from mdio.core.v1.builder import MDIODatasetBuilder, write_mdio_metadata +from mdio.schemas.dtype import ScalarType, StructuredType +from mdio.schemas.compressors import Blosc, ZFP +``` + +## Creating the Builder + +First, create a builder instance with a name and optional global attributes: + +```python +builder = MDIODatasetBuilder( + name="example_dataset", + attributes={ + "description": "An example MDIO v1 dataset", + "creator": "Your Name", + }, +) +``` + +## Adding Dimensions + +Dimensions define the axes of your dataset. You must add at least one dimension before adding coordinates or variables: + +```python +builder = ( + builder + .add_dimension(name="inline", size=256, long_name="Inline Number") + .add_dimension(name="crossline", size=512, long_name="Crossline Number") + .add_dimension(name="depth", size=384, long_name="Depth Sample") +) +``` + +## Adding Coordinates (Optional) + +Coordinates map grid indices to real-world positions (e.g., UTM coordinates on the inline–crossline plane): + +```python +builder = ( + builder + .add_coordinate( + name="cdp_x", + dimensions=["inline", "crossline"], + long_name="CDP X (UTM Easting)", + data_type=ScalarType.FLOAT64, + metadata={"unitsV1": {"length": "m"}}, + ) + .add_coordinate( + name="cdp_y", + dimensions=["inline", "crossline"], + long_name="CDP Y (UTM Northing)", + data_type=ScalarType.FLOAT64, + metadata={"unitsV1": {"length": "m"}}, + ) +) +``` + +If you omit `name`, the builder auto-generates names like `coord_0`. If you omit `dimensions`, it uses all defined dimensions. + +## Adding Variables + +Add one or more seismic data variables (e.g., post-stack amplitude volumes). Variables can have compressors, statistics, and more: + +```python +builder = builder.add_variable( + name="stack_amplitude", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd", level=3), + coordinates=["inline", "crossline", "cdp_x", "cdp_y"], + metadata={ + "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [64, 64, 64]}} + }, +) +``` + +For structured dtypes, use `StructuredType`: + +```python +from mdio.schemas.dtype import StructuredType, ScalarType + +structured_dtype = StructuredType( + fields=[ + {"name": "flag", "format": ScalarType.INT8}, + {"name": "value", "format": ScalarType.FLOAT32}, + ] +) + +builder = builder.add_variable( + name="metadata", + dimensions=["x", "y"], + data_type=structured_dtype, +) +``` + +## Building the Dataset + +After adding all components, call: + +```python +dataset = builder.build() +``` + +This returns a `Dataset` object conforming to the MDIO v1 schema. + +## Writing Metadata and Writing Data + +The `.build()` method returns an in-memory Pydantic `Dataset` model (MDIO v1 schema). To serialize this model to disk, use the following approaches: + +- **Metadata only** (no array values written): + + ```python + # Write metadata structure only (no data arrays) + mds = write_mdio_metadata( + dataset, + store="path/to/output.mdio" + ) + ``` + + This writes only the metadata to the `.mdio` store and returns an `mdio.Dataset` (an xarray.Dataset subclass) with placeholder arrays. + +- **Write actual data** (array values): + + After writing metadata, call `to_mdio()` on the returned `mdio.Dataset` with `compute=True` to write the actual data arrays: + + ```python + # Write data arrays into the existing store + mds.to_mdio( + store="path/to/output.mdio", + mode="a", + compute=True, + ) + ``` + + Alternatively, skip `write_mdio_metadata()` and write both metadata and data in one call by invoking `to_mdio()` directly on the `mdio.Dataset` produced by `_construct_mdio_dataset`, if you have it available. \ No newline at end of file From df8de5760415a9085b97a0901c35798d9ea4306f Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 15:49:37 +0000 Subject: [PATCH 49/55] Remove accidental local change --- tests/test_main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index e0fb0131..1685c011 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -30,7 +30,6 @@ def test_main_succeeds(runner: CliRunner, segy_input: Path, zarr_tmp: Path) -> N def test_main_cloud(runner: CliRunner, segy_input_uri: str, zarr_tmp: Path) -> None: """It exits with a status code of zero.""" os.environ["MDIO__IMPORT__CLOUD_NATIVE"] = "true" - os.environ["MDIO__IMPORT__CPU_COUNT"] = "1" cli_args = ["segy", "import", segy_input_uri, str(zarr_tmp)] cli_args.extend(["--header-locations", "181,185"]) cli_args.extend(["--header-names", "inline,crossline"]) From b7f607c0455332f55f5eed2e43fa48a110f975b9 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 16:10:40 +0000 Subject: [PATCH 50/55] Add contract factory creation method --- src/mdio/core/v1/factory.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/mdio/core/v1/factory.py b/src/mdio/core/v1/factory.py index b4d6ae8c..3cc7ced7 100644 --- a/src/mdio/core/v1/factory.py +++ b/src/mdio/core/v1/factory.py @@ -17,6 +17,39 @@ if TYPE_CHECKING: from mdio.schemas.v1.dataset import Dataset +import json + +from pydantic import ValidationError + + +def from_contract(store: str, contract: str | dict) -> Dataset: + """Creates an MDIO Dataset from the contract and writes the metadata to the store. + + Args: + store: The store to write the metadata to. + contract: The contract to create the dataset from. + + Raises: + ValueError: If the contract cannot be validated successfully. + + Returns: + The created MDIO Dataset. + """ + from mdio.core.v1._serializer import _construct_mdio_dataset + from mdio.schemas.v1 import Dataset as V1Dataset + + if isinstance(contract, str): + contract = json.loads(contract) + + try: + V1Dataset.model_validate(contract) + except ValidationError as e: + msg = f"Failed to validate the input contract: {e}" + raise ValueError(msg) from e + + ds = _construct_mdio_dataset(contract) + return ds.to_mdio(store) + class MDIOSchemaType(Enum): """MDIO templates for specific data types.""" From de2b735f923a73928cc089b36f86e4ed8f773897 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 16:11:01 +0000 Subject: [PATCH 51/55] Linting --- docs/tutorials/builder.ipynb | 1660 ++++++++--------- docs/tutorials/builder.md | 2 +- src/mdio/core/v1/_serializer.py | 16 +- tests/unit/schema/v1/test_template_builder.py | 77 +- 4 files changed, 868 insertions(+), 887 deletions(-) diff --git a/docs/tutorials/builder.ipynb b/docs/tutorials/builder.ipynb index be2483fa..f6667f5e 100644 --- a/docs/tutorials/builder.ipynb +++ b/docs/tutorials/builder.ipynb @@ -1,847 +1,839 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "9fd6d920", - "metadata": {}, - "source": [ - "# Constructing a v1 Dataset with the MDIODatasetBuilder\n", - "\n", - "In this notebook, we demonstrate how to use the `MDIODatasetBuilder` to build and write a post-stack depth-migrated (PSDM) seismic dataset using the MDIO v1 schema." - ] - }, - { - "cell_type": "markdown", - "id": "1240095a", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c00c220", - "metadata": {}, - "outputs": [], - "source": [ - "from mdio.core.v1.builder import MDIODatasetBuilder, write_mdio_metadata\n", - "from mdio.schemas.dtype import ScalarType, StructuredType\n", - "from mdio.schemas.compressors import Blosc, ZFP\n", - "\n", - "# Auxiliary import for formatting and pretty printing\n", - "from rich import print as rprint\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "id": "a9432bdc", - "metadata": {}, - "source": [ - "## 1. Create Builder and Add Dimensions\n", - "First, instantiate a builder instance with a name and optional global attributes. The builder provides a chainable interface to construct bespoke Dataset contracts that may not exist in the factory.\n", - "\n", - "Attributes are free-form and intended to describe the overall dataset, data providence, processing steps, or any other information that would enrich the Dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35505bee", - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize builder for PSDM stack\n", - "builder = MDIODatasetBuilder(\n", - " name=\"psdm_stack_example\",\n", - " attributes={ 'description': 'Example PSDM stack' }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "1befa778", - "metadata": {}, - "source": [ - "# 2. Add Dimensions\n", - "\n", - "The Dimensions represent the core grid of the Dataset.\n", - "\n", - "They are one-dimensional tick-labels which may be populated with values for value-based and index-based access to the Dataset or inert for index-based access to the Dataset.\n", - "\n", - "It is generally recommended to fully populate the dimensions, but is beyond the scope of this example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bd9df8ca", - "metadata": {}, - "outputs": [], - "source": [ - "# Add core dimensions: inline, crossline, depth\n", - "builder.add_dimension('inline', 256, long_name='Inline Number')\\\n", - " .add_dimension('crossline', 512, long_name='Crossline Number')\\\n", - " .add_dimension('depth', 384, long_name='Depth Sample')" - ] - }, - { - "cell_type": "markdown", - "id": "4ac0a62e", - "metadata": {}, - "source": [ - "# 3. Add CDP Coordinates (UTM Easting/Northing)\n", - "\n", - "Coordinates are N-dimensional arrays which enrich the dataset by providing auxiliary coordinate systems.\n", - "\n", - "In this example, our Dataset contract shows that we expect that our inline and crossline indices can be translated into real world coordinate values in Map Grid of Australia [Zone 51](https://epsg.io/28351)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d2da0c3", - "metadata": {}, - "outputs": [ - { - "ename": "ValidationError", - "evalue": "2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mValidationError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# CDP X and Y on inline-crossline grid\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcdp_x\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43minline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcrossline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mCDP X (UTM Easting)\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mScalarType\u001b[49m\u001b[43m.\u001b[49m\u001b[43mFLOAT64\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munitsV1\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlength\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mm\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mattributes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMGA\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m51\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m)\u001b[49m.add_coordinate(\n\u001b[32m 9\u001b[39m name=\u001b[33m'\u001b[39m\u001b[33mcdp_y\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m dimensions=[\u001b[33m'\u001b[39m\u001b[33minline\u001b[39m\u001b[33m'\u001b[39m,\u001b[33m'\u001b[39m\u001b[33mcrossline\u001b[39m\u001b[33m'\u001b[39m],\n\u001b[32m 11\u001b[39m long_name=\u001b[33m'\u001b[39m\u001b[33mCDP Y (UTM Northing)\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 12\u001b[39m data_type=ScalarType.FLOAT64,\n\u001b[32m 13\u001b[39m metadata={\n\u001b[32m 14\u001b[39m \u001b[33m'\u001b[39m\u001b[33munitsV1\u001b[39m\u001b[33m'\u001b[39m: {\u001b[33m'\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mm\u001b[39m\u001b[33m'\u001b[39m},\n\u001b[32m 15\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mattributes\u001b[39m\u001b[33m\"\u001b[39m: {\u001b[33m\"\u001b[39m\u001b[33mMGA\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m51\u001b[39m}\n\u001b[32m 16\u001b[39m },\n\u001b[32m 17\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/builder.py:139\u001b[39m, in \u001b[36mMDIODatasetBuilder.add_coordinate\u001b[39m\u001b[34m(self, name, long_name, dimensions, data_type, metadata)\u001b[39m\n\u001b[32m 135\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 136\u001b[39m dim_objects.append(dim)\n\u001b[32m 138\u001b[39m \u001b[38;5;28mself\u001b[39m._coordinates.append(\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[43mmake_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 140\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 141\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 142\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdim_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 143\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 144\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 145\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 146\u001b[39m )\n\u001b[32m 147\u001b[39m \u001b[38;5;28mself\u001b[39m._state = _BuilderState.HAS_COORDINATES\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/_serializer.py:57\u001b[39m, in \u001b[36mmake_coordinate\u001b[39m\u001b[34m(name, dimensions, data_type, long_name, metadata)\u001b[39m\n\u001b[32m 49\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create a Coordinate with the given name, dimensions, data_type, and metadata.\"\"\"\u001b[39;00m\n\u001b[32m 50\u001b[39m coordinate_dict = {\n\u001b[32m 51\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: name,\n\u001b[32m 52\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mlongName\u001b[39m\u001b[33m\"\u001b[39m: long_name,\n\u001b[32m (...)\u001b[39m\u001b[32m 55\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mmetadata\u001b[39m\u001b[33m\"\u001b[39m: metadata,\n\u001b[32m 56\u001b[39m }\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCoordinate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcoordinate_dict\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/.venv/lib/python3.12/site-packages/pydantic/main.py:212\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(self, **data)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[32m 211\u001b[39m __tracebackhide__ = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m212\u001b[39m validated_self = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m validated_self:\n\u001b[32m 214\u001b[39m warnings.warn(\n\u001b[32m 215\u001b[39m \u001b[33m'\u001b[39m\u001b[33mA custom validator is returning a value other than `self`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mReturning anything other than `self` from a top level model validator isn\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt supported when validating via `__init__`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 217\u001b[39m \u001b[33m'\u001b[39m\u001b[33mSee the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 218\u001b[39m category=\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 219\u001b[39m )\n", - "\u001b[31mValidationError\u001b[39m: 2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden" - ] - } - ], - "source": [ - "# CDP X and Y on inline-crossline grid\n", - "builder.add_coordinate(\n", - " name='cdp_x',\n", - " dimensions=['inline','crossline'],\n", - " long_name='CDP X (UTM Easting)',\n", - " data_type=ScalarType.FLOAT64,\n", - " metadata={\n", - " 'unitsV1': {'length': 'm'}, \n", - " \"attributes\": {\"MGA\": 51}\n", - " },\n", - ").add_coordinate(\n", - " name='cdp_y',\n", - " dimensions=['inline','crossline'],\n", - " long_name='CDP Y (UTM Northing)',\n", - " data_type=ScalarType.FLOAT64,\n", - " metadata={\n", - " 'unitsV1': {'length': 'm'},\n", - " \"attributes\": {\"MGA\": 51}\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "45954756", - "metadata": {}, - "source": [ - "## 3. Add Post-Stack Amplitude Volume Variable" - ] - }, + "cells": [ + { + "cell_type": "markdown", + "id": "9fd6d920", + "metadata": {}, + "source": [ + "# Constructing a v1 Dataset with the MDIODatasetBuilder\n", + "\n", + "In this notebook, we demonstrate how to use the `MDIODatasetBuilder` to build and write a post-stack depth-migrated (PSDM) seismic dataset using the MDIO v1 schema." + ] + }, + { + "cell_type": "markdown", + "id": "1240095a", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c00c220", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# Auxiliary import for formatting and pretty printing\n", + "from rich import print as rprint\n", + "\n", + "from mdio.core.v1.builder import MDIODatasetBuilder\n", + "from mdio.schemas.compressors import Blosc\n", + "from mdio.schemas.dtype import ScalarType" + ] + }, + { + "cell_type": "markdown", + "id": "a9432bdc", + "metadata": {}, + "source": [ + "## 1. Create Builder and Add Dimensions\n", + "First, instantiate a builder instance with a name and optional global attributes. The builder provides a chainable interface to construct bespoke Dataset contracts that may not exist in the factory.\n", + "\n", + "Attributes are free-form and intended to describe the overall dataset, data providence, processing steps, or any other information that would enrich the Dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35505bee", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize builder for PSDM stack\n", + "builder = MDIODatasetBuilder(\n", + " name=\"psdm_stack_example\", attributes={\"description\": \"Example PSDM stack\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1befa778", + "metadata": {}, + "source": [ + "# 2. Add Dimensions\n", + "\n", + "The Dimensions represent the core grid of the Dataset.\n", + "\n", + "They are one-dimensional tick-labels which may be populated with values for value-based and index-based access to the Dataset or inert for index-based access to the Dataset.\n", + "\n", + "It is generally recommended to fully populate the dimensions, but is beyond the scope of this example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bd9df8ca", + "metadata": {}, + "outputs": [], + "source": [ + "# Add core dimensions: inline, crossline, depth\n", + "builder.add_dimension(\"inline\", 256, long_name=\"Inline Number\").add_dimension(\n", + " \"crossline\", 512, long_name=\"Crossline Number\"\n", + ").add_dimension(\"depth\", 384, long_name=\"Depth Sample\")" + ] + }, + { + "cell_type": "markdown", + "id": "4ac0a62e", + "metadata": {}, + "source": [ + "# 3. Add CDP Coordinates (UTM Easting/Northing)\n", + "\n", + "Coordinates are N-dimensional arrays which enrich the dataset by providing auxiliary coordinate systems.\n", + "\n", + "In this example, our Dataset contract shows that we expect that our inline and crossline indices can be translated into real world coordinate values in Map Grid of Australia [Zone 51](https://epsg.io/28351)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d2da0c3", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "7b4c8aa7", - "metadata": {}, - "outputs": [], - "source": [ - "builder.add_variable(\n", - " name='stack_amplitude',\n", - " dimensions=['inline','crossline','depth'],\n", - " data_type=ScalarType.FLOAT32,\n", - " compressor=Blosc(algorithm='zstd', level=3),\n", - " coordinates=['inline','crossline','cdp_x','cdp_y'],\n", - " metadata={\n", - " 'chunkGrid': {'name': 'regular', 'configuration': {'chunkShape': [64, 64, 64]}}\n", - " },\n", - ")" - ] - }, + "ename": "ValidationError", + "evalue": "2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mValidationError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# CDP X and Y on inline-crossline grid\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcdp_x\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43minline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcrossline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mCDP X (UTM Easting)\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mScalarType\u001b[49m\u001b[43m.\u001b[49m\u001b[43mFLOAT64\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munitsV1\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlength\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mm\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mattributes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMGA\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m51\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m)\u001b[49m.add_coordinate(\n\u001b[32m 9\u001b[39m name=\u001b[33m'\u001b[39m\u001b[33mcdp_y\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m dimensions=[\u001b[33m'\u001b[39m\u001b[33minline\u001b[39m\u001b[33m'\u001b[39m,\u001b[33m'\u001b[39m\u001b[33mcrossline\u001b[39m\u001b[33m'\u001b[39m],\n\u001b[32m 11\u001b[39m long_name=\u001b[33m'\u001b[39m\u001b[33mCDP Y (UTM Northing)\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 12\u001b[39m data_type=ScalarType.FLOAT64,\n\u001b[32m 13\u001b[39m metadata={\n\u001b[32m 14\u001b[39m \u001b[33m'\u001b[39m\u001b[33munitsV1\u001b[39m\u001b[33m'\u001b[39m: {\u001b[33m'\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mm\u001b[39m\u001b[33m'\u001b[39m},\n\u001b[32m 15\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mattributes\u001b[39m\u001b[33m\"\u001b[39m: {\u001b[33m\"\u001b[39m\u001b[33mMGA\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m51\u001b[39m}\n\u001b[32m 16\u001b[39m },\n\u001b[32m 17\u001b[39m )\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/builder.py:139\u001b[39m, in \u001b[36mMDIODatasetBuilder.add_coordinate\u001b[39m\u001b[34m(self, name, long_name, dimensions, data_type, metadata)\u001b[39m\n\u001b[32m 135\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 136\u001b[39m dim_objects.append(dim)\n\u001b[32m 138\u001b[39m \u001b[38;5;28mself\u001b[39m._coordinates.append(\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[43mmake_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 140\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 141\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 142\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdim_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 143\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 144\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 145\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 146\u001b[39m )\n\u001b[32m 147\u001b[39m \u001b[38;5;28mself\u001b[39m._state = _BuilderState.HAS_COORDINATES\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/_serializer.py:57\u001b[39m, in \u001b[36mmake_coordinate\u001b[39m\u001b[34m(name, dimensions, data_type, long_name, metadata)\u001b[39m\n\u001b[32m 49\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create a Coordinate with the given name, dimensions, data_type, and metadata.\"\"\"\u001b[39;00m\n\u001b[32m 50\u001b[39m coordinate_dict = {\n\u001b[32m 51\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: name,\n\u001b[32m 52\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mlongName\u001b[39m\u001b[33m\"\u001b[39m: long_name,\n\u001b[32m (...)\u001b[39m\u001b[32m 55\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mmetadata\u001b[39m\u001b[33m\"\u001b[39m: metadata,\n\u001b[32m 56\u001b[39m }\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCoordinate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcoordinate_dict\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/.venv/lib/python3.12/site-packages/pydantic/main.py:212\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(self, **data)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[32m 211\u001b[39m __tracebackhide__ = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m212\u001b[39m validated_self = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m validated_self:\n\u001b[32m 214\u001b[39m warnings.warn(\n\u001b[32m 215\u001b[39m \u001b[33m'\u001b[39m\u001b[33mA custom validator is returning a value other than `self`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mReturning anything other than `self` from a top level model validator isn\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt supported when validating via `__init__`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 217\u001b[39m \u001b[33m'\u001b[39m\u001b[33mSee the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 218\u001b[39m category=\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 219\u001b[39m )\n", + "\u001b[31mValidationError\u001b[39m: 2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden" + ] + } + ], + "source": [ + "# CDP X and Y on inline-crossline grid\n", + "builder.add_coordinate(\n", + " name=\"cdp_x\",\n", + " dimensions=[\"inline\", \"crossline\"],\n", + " long_name=\"CDP X (UTM Easting)\",\n", + " data_type=ScalarType.FLOAT64,\n", + " metadata={\"unitsV1\": {\"length\": \"m\"}, \"attributes\": {\"MGA\": 51}},\n", + ").add_coordinate(\n", + " name=\"cdp_y\",\n", + " dimensions=[\"inline\", \"crossline\"],\n", + " long_name=\"CDP Y (UTM Northing)\",\n", + " data_type=ScalarType.FLOAT64,\n", + " metadata={\"unitsV1\": {\"length\": \"m\"}, \"attributes\": {\"MGA\": 51}},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "45954756", + "metadata": {}, + "source": [ + "## 3. Add Post-Stack Amplitude Volume Variable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b4c8aa7", + "metadata": {}, + "outputs": [], + "source": [ + "builder.add_variable(\n", + " name=\"stack_amplitude\",\n", + " dimensions=[\"inline\", \"crossline\", \"depth\"],\n", + " data_type=ScalarType.FLOAT32,\n", + " compressor=Blosc(algorithm=\"zstd\", level=3),\n", + " coordinates=[\"inline\", \"crossline\", \"cdp_x\", \"cdp_y\"],\n", + " metadata={\"chunkGrid\": {\"name\": \"regular\", \"configuration\": {\"chunkShape\": [64, 64, 64]}}},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0ed7500f", + "metadata": {}, + "source": [ + "## 4. Build and Write" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d7df200f", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "id": "0ed7500f", - "metadata": {}, - "source": [ - "## 4. Build and Write" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspaces/mdio-python/src/mdio/core/v1/_overloads.py:32: FutureWarning: zarr_version is deprecated, use zarr_format\n", + " return super().to_zarr(*args, store=store, **kwargs)\n" + ] }, { - "cell_type": "code", - "execution_count": 5, - "id": "d7df200f", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspaces/mdio-python/src/mdio/core/v1/_overloads.py:32: FutureWarning: zarr_version is deprecated, use zarr_format\n", - " return super().to_zarr(*args, store=store, **kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.MDIODataset> Size: 203MB\n",
-              "Dimensions:          (inline: 256, crossline: 512, depth: 384)\n",
-              "Coordinates:\n",
-              "  * inline           (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
-              "  * crossline        (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n",
-              "  * depth            (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
-              "Data variables:\n",
-              "    stack_amplitude  (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n",
-              "    cdp_x            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
-              "    cdp_y            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
-              "Attributes:\n",
-              "    apiVersion:  1.0.0\n",
-              "    createdOn:   2025-05-29 14:18:21.113904+00:00\n",
-              "    name:        psdm_stack_example\n",
-              "    attributes:  {'description': 'Example PSDM stack'}
" - ], - "text/plain": [ - " Size: 203MB\n", - "Dimensions: (inline: 256, crossline: 512, depth: 384)\n", - "Coordinates:\n", - " * inline (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", - " * crossline (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n", - " * depth (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", - "Data variables:\n", - " stack_amplitude (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n", - " cdp_x (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", - " cdp_y (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", - "Attributes:\n", - " apiVersion: 1.0.0\n", - " createdOn: 2025-05-29 14:18:21.113904+00:00\n", - " name: psdm_stack_example\n", - " attributes: {'description': 'Example PSDM stack'}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.MDIODataset> Size: 203MB\n",
+       "Dimensions:          (inline: 256, crossline: 512, depth: 384)\n",
+       "Coordinates:\n",
+       "  * inline           (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
+       "  * crossline        (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n",
+       "  * depth            (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n",
+       "Data variables:\n",
+       "    stack_amplitude  (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n",
+       "    cdp_x            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
+       "    cdp_y            (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n",
+       "Attributes:\n",
+       "    apiVersion:  1.0.0\n",
+       "    createdOn:   2025-05-29 14:18:21.113904+00:00\n",
+       "    name:        psdm_stack_example\n",
+       "    attributes:  {'description': 'Example PSDM stack'}
" ], - "source": [ - "# Write only metadata to .mdio store and build the interactable Dataset object\n", - "ds = builder.to_mdio(store='output/psdm_stack_example.mdio')\n", - "\n", - "# Display the interactable Dataset\n", - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "9efbeb0b", - "metadata": {}, - "source": [ - "# Build and view the Dataset contract" + "text/plain": [ + " Size: 203MB\n", + "Dimensions: (inline: 256, crossline: 512, depth: 384)\n", + "Coordinates:\n", + " * inline (inline) int32 1kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", + " * crossline (crossline) int32 2kB 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0\n", + " * depth (depth) int32 2kB 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0\n", + "Data variables:\n", + " stack_amplitude (inline, crossline, depth) float32 201MB 0.0 0.0 ... 0.0\n", + " cdp_x (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", + " cdp_y (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", + "Attributes:\n", + " apiVersion: 1.0.0\n", + " createdOn: 2025-05-29 14:18:21.113904+00:00\n", + " name: psdm_stack_example\n", + " attributes: {'description': 'Example PSDM stack'}" ] - }, + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Write only metadata to .mdio store and build the interactable Dataset object\n", + "ds = builder.to_mdio(store=\"output/psdm_stack_example.mdio\")\n", + "\n", + "# Display the interactable Dataset\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "9efbeb0b", + "metadata": {}, + "source": [ + "# Build and view the Dataset contract" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bbcca480", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 7, - "id": "bbcca480", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
{\n",
-              "    'metadata': {\n",
-              "        'name': 'psdm_stack_example',\n",
-              "        'apiVersion': '1.0.0',\n",
-              "        'createdOn': '2025-05-29T14:18:21.113904Z',\n",
-              "        'attributes': {'description': 'Example PSDM stack'}\n",
-              "    },\n",
-              "    'variables': [\n",
-              "        {\n",
-              "            'dataType': 'int32',\n",
-              "            'dimensions': [{'name': 'inline', 'size': 256}],\n",
-              "            'name': 'inline',\n",
-              "            'longName': 'Inline Number'\n",
-              "        },\n",
-              "        {\n",
-              "            'dataType': 'int32',\n",
-              "            'dimensions': [{'name': 'crossline', 'size': 512}],\n",
-              "            'name': 'crossline',\n",
-              "            'longName': 'Crossline Number'\n",
-              "        },\n",
-              "        {\n",
-              "            'dataType': 'int32',\n",
-              "            'dimensions': [{'name': 'depth', 'size': 384}],\n",
-              "            'name': 'depth',\n",
-              "            'longName': 'Depth Sample'\n",
-              "        },\n",
-              "        {\n",
-              "            'dataType': 'float32',\n",
-              "            'dimensions': [\n",
-              "                {'name': 'inline', 'size': 256},\n",
-              "                {'name': 'crossline', 'size': 512},\n",
-              "                {'name': 'depth', 'size': 384}\n",
-              "            ],\n",
-              "            'compressor': {'name': 'blosc', 'algorithm': 'zstd', 'level': 3, 'shuffle': 1, 'blocksize': 0},\n",
-              "            'name': 'stack_amplitude',\n",
-              "            'coordinates': ['inline', 'crossline', 'cdp_x', 'cdp_y'],\n",
-              "            'metadata': {'chunkGrid': {'name': 'regular', 'configuration': {'chunkShape': [64, 64, 64]}}}\n",
-              "        },\n",
-              "        {\n",
-              "            'dataType': 'float64',\n",
-              "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
-              "            'name': 'cdp_x',\n",
-              "            'longName': 'CDP X (UTM Easting)',\n",
-              "            'metadata': {'unitsV1': {'length': 'm'}}\n",
-              "        },\n",
-              "        {\n",
-              "            'dataType': 'float64',\n",
-              "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
-              "            'name': 'cdp_y',\n",
-              "            'longName': 'CDP Y (UTM Northing)',\n",
-              "            'metadata': {'unitsV1': {'length': 'm'}}\n",
-              "        }\n",
-              "    ]\n",
-              "}\n",
-              "
\n" - ], - "text/plain": [ - "\u001b[1m{\u001b[0m\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'psdm_stack_example'\u001b[0m,\n", - " \u001b[32m'apiVersion'\u001b[0m: \u001b[32m'1.0.0'\u001b[0m,\n", - " \u001b[32m'createdOn'\u001b[0m: \u001b[32m'2025-05-29T14:18:21.113904Z'\u001b[0m,\n", - " \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Example PSDM stack'\u001b[0m\u001b[1m}\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[32m'variables'\u001b[0m: \u001b[1m[\u001b[0m\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m,\n", - " \u001b[32m'longName'\u001b[0m: \u001b[32m'Inline Number'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m,\n", - " \u001b[32m'longName'\u001b[0m: \u001b[32m'Crossline Number'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m,\n", - " \u001b[32m'longName'\u001b[0m: \u001b[32m'Depth Sample'\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float32'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\n", - " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\n", - " \u001b[1m]\u001b[0m,\n", - " \u001b[32m'compressor'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'blosc'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'zstd'\u001b[0m, \u001b[32m'level'\u001b[0m: \u001b[1;36m3\u001b[0m, \u001b[32m'shuffle'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'blocksize'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'stack_amplitude'\u001b[0m,\n", - " \u001b[32m'coordinates'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'inline'\u001b[0m, \u001b[32m'crossline'\u001b[0m, \u001b[32m'cdp_x'\u001b[0m, \u001b[32m'cdp_y'\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkGrid'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'regular'\u001b[0m, \u001b[32m'configuration'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkShape'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_x'\u001b[0m,\n", - " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP X \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Easting\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", - " \u001b[1m}\u001b[0m,\n", - " \u001b[1m{\u001b[0m\n", - " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", - " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", - " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_y'\u001b[0m,\n", - " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP Y \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Northing\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", - " \u001b[1m}\u001b[0m\n", - " \u001b[1m]\u001b[0m\n", - "\u001b[1m}\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } + "data": { + "text/html": [ + "
{\n",
+       "    'metadata': {\n",
+       "        'name': 'psdm_stack_example',\n",
+       "        'apiVersion': '1.0.0',\n",
+       "        'createdOn': '2025-05-29T14:18:21.113904Z',\n",
+       "        'attributes': {'description': 'Example PSDM stack'}\n",
+       "    },\n",
+       "    'variables': [\n",
+       "        {\n",
+       "            'dataType': 'int32',\n",
+       "            'dimensions': [{'name': 'inline', 'size': 256}],\n",
+       "            'name': 'inline',\n",
+       "            'longName': 'Inline Number'\n",
+       "        },\n",
+       "        {\n",
+       "            'dataType': 'int32',\n",
+       "            'dimensions': [{'name': 'crossline', 'size': 512}],\n",
+       "            'name': 'crossline',\n",
+       "            'longName': 'Crossline Number'\n",
+       "        },\n",
+       "        {\n",
+       "            'dataType': 'int32',\n",
+       "            'dimensions': [{'name': 'depth', 'size': 384}],\n",
+       "            'name': 'depth',\n",
+       "            'longName': 'Depth Sample'\n",
+       "        },\n",
+       "        {\n",
+       "            'dataType': 'float32',\n",
+       "            'dimensions': [\n",
+       "                {'name': 'inline', 'size': 256},\n",
+       "                {'name': 'crossline', 'size': 512},\n",
+       "                {'name': 'depth', 'size': 384}\n",
+       "            ],\n",
+       "            'compressor': {'name': 'blosc', 'algorithm': 'zstd', 'level': 3, 'shuffle': 1, 'blocksize': 0},\n",
+       "            'name': 'stack_amplitude',\n",
+       "            'coordinates': ['inline', 'crossline', 'cdp_x', 'cdp_y'],\n",
+       "            'metadata': {'chunkGrid': {'name': 'regular', 'configuration': {'chunkShape': [64, 64, 64]}}}\n",
+       "        },\n",
+       "        {\n",
+       "            'dataType': 'float64',\n",
+       "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
+       "            'name': 'cdp_x',\n",
+       "            'longName': 'CDP X (UTM Easting)',\n",
+       "            'metadata': {'unitsV1': {'length': 'm'}}\n",
+       "        },\n",
+       "        {\n",
+       "            'dataType': 'float64',\n",
+       "            'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n",
+       "            'name': 'cdp_y',\n",
+       "            'longName': 'CDP Y (UTM Northing)',\n",
+       "            'metadata': {'unitsV1': {'length': 'm'}}\n",
+       "        }\n",
+       "    ]\n",
+       "}\n",
+       "
\n" ], - "source": [ - "# Build our Dataset model from the builder\n", - "dataset = builder.build()\n", - "\n", - "# Serialize the Dataset model to JSON\n", - "contract = json.loads(dataset.json())\n", - "\n", - "# Reorder the contract so that metadata is displayed first\n", - "ordered_contract = {\n", - " \"metadata\": contract[\"metadata\"],\n", - " \"variables\": contract[\"variables\"],\n", - "}\n", - "\n", - "rprint(ordered_contract)" + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'psdm_stack_example'\u001b[0m,\n", + " \u001b[32m'apiVersion'\u001b[0m: \u001b[32m'1.0.0'\u001b[0m,\n", + " \u001b[32m'createdOn'\u001b[0m: \u001b[32m'2025-05-29T14:18:21.113904Z'\u001b[0m,\n", + " \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Example PSDM stack'\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[32m'variables'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Inline Number'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Crossline Number'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'int32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'Depth Sample'\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float32'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'depth'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m384\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m]\u001b[0m,\n", + " \u001b[32m'compressor'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'blosc'\u001b[0m, \u001b[32m'algorithm'\u001b[0m: \u001b[32m'zstd'\u001b[0m, \u001b[32m'level'\u001b[0m: \u001b[1;36m3\u001b[0m, \u001b[32m'shuffle'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'blocksize'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'stack_amplitude'\u001b[0m,\n", + " \u001b[32m'coordinates'\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m'inline'\u001b[0m, \u001b[32m'crossline'\u001b[0m, \u001b[32m'cdp_x'\u001b[0m, \u001b[32m'cdp_y'\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkGrid'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'regular'\u001b[0m, \u001b[32m'configuration'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'chunkShape'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m, \u001b[1;36m64\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_x'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP X \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Easting\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[1m{\u001b[0m\n", + " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", + " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_y'\u001b[0m,\n", + " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP Y \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Northing\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" ] + }, + "metadata": {}, + "output_type": "display_data" } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (mdio-dev)", - "language": "python", - "name": "mdio-dev" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } + ], + "source": [ + "# Build our Dataset model from the builder\n", + "dataset = builder.build()\n", + "\n", + "# Serialize the Dataset model to JSON\n", + "contract = json.loads(dataset.json())\n", + "\n", + "# Reorder the contract so that metadata is displayed first\n", + "ordered_contract = {\n", + " \"metadata\": contract[\"metadata\"],\n", + " \"variables\": contract[\"variables\"],\n", + "}\n", + "\n", + "rprint(ordered_contract)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (mdio-dev)", + "language": "python", + "name": "mdio-dev" }, - "nbformat": 4, - "nbformat_minor": 5 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/tutorials/builder.md b/docs/tutorials/builder.md index f41f68a4..9fddf377 100644 --- a/docs/tutorials/builder.md +++ b/docs/tutorials/builder.md @@ -143,4 +143,4 @@ The `.build()` method returns an in-memory Pydantic `Dataset` model (MDIO v1 sch ) ``` - Alternatively, skip `write_mdio_metadata()` and write both metadata and data in one call by invoking `to_mdio()` directly on the `mdio.Dataset` produced by `_construct_mdio_dataset`, if you have it available. \ No newline at end of file + Alternatively, skip `write_mdio_metadata()` and write both metadata and data in one call by invoking `to_mdio()` directly on the `mdio.Dataset` produced by `_construct_mdio_dataset`, if you have it available. diff --git a/src/mdio/core/v1/_serializer.py b/src/mdio/core/v1/_serializer.py index f5956bab..d7c6656d 100644 --- a/src/mdio/core/v1/_serializer.py +++ b/src/mdio/core/v1/_serializer.py @@ -54,23 +54,25 @@ def make_coordinate( # single dict input if isinstance(metadata, dict): if "unitsV1" in metadata: - items.append(AllUnits(**{"unitsV1": metadata["unitsV1"]})) + items.append(AllUnits(unitsV1=metadata["unitsV1"])) if "attributes" in metadata: - items.append(UserAttributes(**{"attributes": metadata["attributes"]})) + items.append(UserAttributes(attributes=metadata["attributes"])) # list input may contain dict or model instances elif isinstance(metadata, list): for md in metadata: - if isinstance(md, AllUnits) or isinstance(md, UserAttributes): + if isinstance(md, AllUnits) or isinstance(md, UserAttributes): # noqa: SIM101 items.append(md) elif isinstance(md, dict): if "unitsV1" in md: - items.append(AllUnits(**{"unitsV1": md["unitsV1"]})) + items.append(AllUnits(unitsV1=md["unitsV1"])) if "attributes" in md: - items.append(UserAttributes(**{"attributes": md["attributes"]})) + items.append(UserAttributes(attributes=md["attributes"])) else: - raise TypeError(f"Unsupported metadata element type for coordinate: {type(md)}") + msg = f"Unsupported metadata element type for coordinate: {type(md)}" + raise TypeError(msg) else: - raise TypeError(f"Unsupported metadata type for coordinate: {type(metadata)}") + msg = f"Unsupported metadata type for coordinate: {type(metadata)}" + raise TypeError(msg) coord_meta_list = items or None return Coordinate( name=name, diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 0211302e..fb5bfd3e 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -89,7 +89,7 @@ def test_dimension_with_attributes() -> None: depth_var = builder._variables[0] assert depth_var.name == "depth" assert depth_var.data_type == ScalarType.FLOAT32 - assert depth_var.metadata.attributes["MGA"] == 51 + assert depth_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 def test_dimension_with_chunk_grid() -> None: @@ -137,8 +137,8 @@ def test_dimension_with_stats() -> None: depth_var = builder._variables[0] assert depth_var.name == "depth" assert depth_var.data_type == ScalarType.FLOAT32 - assert depth_var.metadata.stats_v1.count == 100 - assert depth_var.metadata.stats_v1.sum == 1215.1 + assert depth_var.metadata.stats_v1.count == 100 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.sum == 1215.1 # noqa: PLR2004 def test_dimension_with_full_metadata() -> None: @@ -170,16 +170,16 @@ def test_dimension_with_full_metadata() -> None: assert depth_var.name == "depth" assert depth_var.data_type == ScalarType.FLOAT32 assert depth_var.metadata.units_v1.length == "m" - assert depth_var.metadata.attributes["MGA"] == 51 + assert depth_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 assert depth_var.metadata.chunk_grid.name == "regular" - assert depth_var.metadata.chunk_grid.configuration.chunk_shape == [20] - assert depth_var.metadata.stats_v1.count == 100 - assert depth_var.metadata.stats_v1.sum == 1215.1 - assert depth_var.metadata.stats_v1.sum_squares == 125.12 - assert depth_var.metadata.stats_v1.min == 5.61 - assert depth_var.metadata.stats_v1.max == 10.84 - assert depth_var.metadata.stats_v1.histogram.bin_centers == [1, 2] - assert depth_var.metadata.stats_v1.histogram.counts == [10, 15] + assert depth_var.metadata.chunk_grid.configuration.chunk_shape == [20] # noqa: PLR2004 + assert depth_var.metadata.stats_v1.count == 100 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.sum == 1215.1 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.sum_squares == 125.12 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.min == 5.61 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.max == 10.84 # noqa: PLR2004 + assert depth_var.metadata.stats_v1.histogram.bin_centers == [1, 2] # noqa: PLR2004 + assert depth_var.metadata.stats_v1.histogram.counts == [10, 15] # noqa: PLR2004 j = builder.build().json() print(j) @@ -192,15 +192,18 @@ def test_coordiante_with_units() -> None: builder.add_dimension("crossline", 100) # Add coordinate with units - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}}) + builder.add_coordinate( + "cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}} + ) - assert len(builder._variables) == 2 - assert len(builder._coordinates) == 1 + assert len(builder._variables) == 2 # noqa: PLR2004 + assert len(builder._coordinates) == 1 # noqa: PLR2004 cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 assert cdp_var.metadata.units_v1.length == "m" + def test_coordinate_with_attributes() -> None: """Test adding coordinates with attributes.""" builder = MDIODatasetBuilder("test_dataset") @@ -208,31 +211,16 @@ def test_coordinate_with_attributes() -> None: builder.add_dimension("crossline", 100) # Add coordinate with attributes - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"attributes": {"MGA": 51}}) - - assert len(builder._variables) == 2 - assert len(builder._coordinates) == 1 - cdp_var = builder._coordinates[0] - assert cdp_var.name == "cdp" - assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.attributes["MGA"] == 51 - -def test_coordinate_with_chunk_grid() -> None: - """Test adding coordinates with chunk grid.""" - builder = MDIODatasetBuilder("test_dataset") - builder.add_dimension("inline", 100) - builder.add_dimension("crossline", 100) - - # Add coordinate with chunk grid - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20, 20]}}}) + builder.add_coordinate( + "cdp", dimensions=["inline", "crossline"], metadata={"attributes": {"MGA": 51}} + ) - assert len(builder._variables) == 2 - assert len(builder._coordinates) == 1 + assert len(builder._variables) == 2 # noqa: PLR2004 + assert len(builder._coordinates) == 1 # noqa: PLR2004 cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.chunk_grid.name == "regular" - assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20, 20] + assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 def test_coordinate_with_full_metadata() -> None: @@ -242,20 +230,19 @@ def test_coordinate_with_full_metadata() -> None: builder.add_dimension("crossline", 100) # Add coordinate with all metadata - builder.add_coordinate("cdp", dimensions=["inline", "crossline"], metadata={"unitsV1": {"length": "m"}, "attributes": {"MGA": 51}, "chunkGrid": {"name": "regular", "configuration": {"chunkShape": [20]}}}) + builder.add_coordinate( + "cdp", + dimensions=["inline", "crossline"], + metadata={"unitsV1": {"length": "m"}, "attributes": {"MGA": 51}}, + ) - assert len(builder._variables) == 2 - assert len(builder._coordinates) == 1 + assert len(builder._variables) == 2 # noqa: PLR2004 + assert len(builder._coordinates) == 1 # noqa: PLR2004 cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 assert cdp_var.metadata.units_v1.length == "m" - assert cdp_var.metadata.attributes["MGA"] == 51 - assert cdp_var.metadata.chunk_grid.name == "regular" - assert cdp_var.metadata.chunk_grid.configuration.chunk_shape == [20] - - j = builder.build().json() - print(j) + assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 def test_coordinate_builder_state() -> None: From 85a2adbc29128bfbc63e5209538db194b98daeeb Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 20:37:23 +0000 Subject: [PATCH 52/55] Fix tests until proposed PR is merged --- tests/unit/schema/v1/test_template_builder.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index fb5bfd3e..1ef8821d 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -201,7 +201,8 @@ def test_coordiante_with_units() -> None: cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.units_v1.length == "m" + # assert cdp_var.metadata.units_v1.length == "m" + assert cdp_var.metadata[0].units_v1.length == "m" def test_coordinate_with_attributes() -> None: @@ -220,8 +221,8 @@ def test_coordinate_with_attributes() -> None: cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 - + # assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 + assert cdp_var.metadata[0].attributes["MGA"] == 51 # noqa: PLR2004 def test_coordinate_with_full_metadata() -> None: """Test adding coordinates with all metadata.""" @@ -241,8 +242,13 @@ def test_coordinate_with_full_metadata() -> None: cdp_var = builder._coordinates[0] assert cdp_var.name == "cdp" assert cdp_var.data_type == ScalarType.FLOAT32 - assert cdp_var.metadata.units_v1.length == "m" - assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 + + # TODO(BrianMichell): #553 - If this PR is merged, we can remove the subscripting + + # assert cdp_var.metadata.units_v1.length == "m" + assert cdp_var.metadata[0].units_v1.length == "m" + # assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 + assert cdp_var.metadata[1].attributes["MGA"] == 51 # noqa: PLR2004 def test_coordinate_builder_state() -> None: From 87e4a826e3cb5e0ae1720fa6271fe5c2bf38f245 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 20:45:52 +0000 Subject: [PATCH 53/55] Fix deprecated zarr_version kwarg --- src/mdio/core/v1/_overloads.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mdio/core/v1/_overloads.py b/src/mdio/core/v1/_overloads.py index 55fb0a2b..a07353b1 100644 --- a/src/mdio/core/v1/_overloads.py +++ b/src/mdio/core/v1/_overloads.py @@ -23,12 +23,12 @@ def to_mdio( **kwargs: Mapping[str, str | int | float | bool], ) -> None: """Alias for `.to_zarr()`.""" - # Ensure zarr_version=2 by default unless explicitly overridden - zarr_version = kwargs.get("zarr_version", 2) - if zarr_version != 2: # noqa: PLR2004 - msg = "MDIO only supports zarr_version=2" + # Ensure zarr_format=2 by default unless explicitly overridden + zarr_format = kwargs.get("zarr_format", 2) + if zarr_format != 2: # noqa: PLR2004 + msg = "MDIO only supports zarr_format=2" raise ValueError(msg) - kwargs["zarr_version"] = zarr_version + kwargs["zarr_format"] = zarr_format return super().to_zarr(*args, store=store, **kwargs) @@ -44,12 +44,12 @@ def to_mdio( **kwargs: Mapping[str, str | int | float | bool], ) -> None: """Alias for `.to_zarr()`, and writes to Zarr store.""" - # Ensure zarr_version=2 by default unless explicitly overridden - zarr_version = kwargs.get("zarr_version", 2) - if zarr_version != 2: # noqa: PLR2004 - msg = "MDIO only supports zarr_version=2" + # Ensure zarr_format=2 by default unless explicitly overridden + zarr_format = kwargs.get("zarr_format", 2) + if zarr_format != 2: # noqa: PLR2004 + msg = "MDIO only supports zarr_format=2" raise ValueError(msg) - kwargs["zarr_version"] = zarr_version + kwargs["zarr_format"] = zarr_format return super().to_zarr(*args, store=store, **kwargs) From b39f8c6cd27253fd69c8e6b31c53a1b53c8fa448 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 30 May 2025 20:46:26 +0000 Subject: [PATCH 54/55] Linting --- tests/unit/schema/v1/test_template_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/schema/v1/test_template_builder.py b/tests/unit/schema/v1/test_template_builder.py index 1ef8821d..83ac3b11 100644 --- a/tests/unit/schema/v1/test_template_builder.py +++ b/tests/unit/schema/v1/test_template_builder.py @@ -224,6 +224,7 @@ def test_coordinate_with_attributes() -> None: # assert cdp_var.metadata.attributes["MGA"] == 51 # noqa: PLR2004 assert cdp_var.metadata[0].attributes["MGA"] == 51 # noqa: PLR2004 + def test_coordinate_with_full_metadata() -> None: """Test adding coordinates with all metadata.""" builder = MDIODatasetBuilder("test_dataset") From 006f6146b8619e12adbf629044f25951856deec6 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Mon, 2 Jun 2025 13:48:52 +0000 Subject: [PATCH 55/55] Fix example run --- docs/tutorials/builder.ipynb | 69 ++++++++++++------------------------ 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/docs/tutorials/builder.ipynb b/docs/tutorials/builder.ipynb index f6667f5e..30b397cf 100644 --- a/docs/tutorials/builder.ipynb +++ b/docs/tutorials/builder.ipynb @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1c00c220", "metadata": {}, "outputs": [], @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "35505bee", "metadata": {}, "outputs": [], @@ -103,22 +103,7 @@ "execution_count": null, "id": "7d2da0c3", "metadata": {}, - "outputs": [ - { - "ename": "ValidationError", - "evalue": "2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mValidationError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# CDP X and Y on inline-crossline grid\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 3\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcdp_x\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 4\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43minline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mcrossline\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mCDP X (UTM Easting)\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mScalarType\u001b[49m\u001b[43m.\u001b[49m\u001b[43mFLOAT64\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43munitsV1\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mlength\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mm\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mattributes\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMGA\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m51\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m)\u001b[49m.add_coordinate(\n\u001b[32m 9\u001b[39m name=\u001b[33m'\u001b[39m\u001b[33mcdp_y\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 10\u001b[39m dimensions=[\u001b[33m'\u001b[39m\u001b[33minline\u001b[39m\u001b[33m'\u001b[39m,\u001b[33m'\u001b[39m\u001b[33mcrossline\u001b[39m\u001b[33m'\u001b[39m],\n\u001b[32m 11\u001b[39m long_name=\u001b[33m'\u001b[39m\u001b[33mCDP Y (UTM Northing)\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 12\u001b[39m data_type=ScalarType.FLOAT64,\n\u001b[32m 13\u001b[39m metadata={\n\u001b[32m 14\u001b[39m \u001b[33m'\u001b[39m\u001b[33munitsV1\u001b[39m\u001b[33m'\u001b[39m: {\u001b[33m'\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m'\u001b[39m: \u001b[33m'\u001b[39m\u001b[33mm\u001b[39m\u001b[33m'\u001b[39m},\n\u001b[32m 15\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mattributes\u001b[39m\u001b[33m\"\u001b[39m: {\u001b[33m\"\u001b[39m\u001b[33mMGA\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m51\u001b[39m}\n\u001b[32m 16\u001b[39m },\n\u001b[32m 17\u001b[39m )\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/builder.py:139\u001b[39m, in \u001b[36mMDIODatasetBuilder.add_coordinate\u001b[39m\u001b[34m(self, name, long_name, dimensions, data_type, metadata)\u001b[39m\n\u001b[32m 135\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 136\u001b[39m dim_objects.append(dim)\n\u001b[32m 138\u001b[39m \u001b[38;5;28mself\u001b[39m._coordinates.append(\n\u001b[32m--> \u001b[39m\u001b[32m139\u001b[39m \u001b[43mmake_coordinate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 140\u001b[39m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 141\u001b[39m \u001b[43m \u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlong_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 142\u001b[39m \u001b[43m \u001b[49m\u001b[43mdimensions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdim_objects\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 143\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 144\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 145\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 146\u001b[39m )\n\u001b[32m 147\u001b[39m \u001b[38;5;28mself\u001b[39m._state = _BuilderState.HAS_COORDINATES\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/src/mdio/core/v1/_serializer.py:57\u001b[39m, in \u001b[36mmake_coordinate\u001b[39m\u001b[34m(name, dimensions, data_type, long_name, metadata)\u001b[39m\n\u001b[32m 49\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create a Coordinate with the given name, dimensions, data_type, and metadata.\"\"\"\u001b[39;00m\n\u001b[32m 50\u001b[39m coordinate_dict = {\n\u001b[32m 51\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: name,\n\u001b[32m 52\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mlongName\u001b[39m\u001b[33m\"\u001b[39m: long_name,\n\u001b[32m (...)\u001b[39m\u001b[32m 55\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mmetadata\u001b[39m\u001b[33m\"\u001b[39m: metadata,\n\u001b[32m 56\u001b[39m }\n\u001b[32m---> \u001b[39m\u001b[32m57\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCoordinate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mcoordinate_dict\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m/workspaces/mdio-python/.venv/lib/python3.12/site-packages/pydantic/main.py:212\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(self, **data)\u001b[39m\n\u001b[32m 210\u001b[39m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[32m 211\u001b[39m __tracebackhide__ = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m212\u001b[39m validated_self = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m validated_self:\n\u001b[32m 214\u001b[39m warnings.warn(\n\u001b[32m 215\u001b[39m \u001b[33m'\u001b[39m\u001b[33mA custom validator is returning a value other than `self`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mReturning anything other than `self` from a top level model validator isn\u001b[39m\u001b[33m'\u001b[39m\u001b[33mt supported when validating via `__init__`.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 217\u001b[39m \u001b[33m'\u001b[39m\u001b[33mSee the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.\u001b[39m\u001b[33m'\u001b[39m,\n\u001b[32m 218\u001b[39m category=\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 219\u001b[39m )\n", - "\u001b[31mValidationError\u001b[39m: 2 validation errors for Coordinate\nmetadata.0.AllUnits.attributes\n Extra inputs are not permitted [type=extra_forbidden, input_value={'MGA': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden\nmetadata.0.UserAttributes.unitsV1\n Extra inputs are not permitted [type=extra_forbidden, input_value={'length': 'm'}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden" - ] - } - ], + "outputs": [], "source": [ "# CDP X and Y on inline-crossline grid\n", "builder.add_coordinate(\n", @@ -171,18 +156,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "d7df200f", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/workspaces/mdio-python/src/mdio/core/v1/_overloads.py:32: FutureWarning: zarr_version is deprecated, use zarr_format\n", - " return super().to_zarr(*args, store=store, **kwargs)\n" - ] - }, { "data": { "text/html": [ @@ -568,9 +545,9 @@ " cdp_y (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", "Attributes:\n", " apiVersion: 1.0.0\n", - " createdOn: 2025-05-29 14:18:21.113904+00:00\n", + " createdOn: 2025-06-02 13:40:42.724997+00:00\n", " name: psdm_stack_example\n", - " attributes: {'description': 'Example PSDM stack'}
  • apiVersion :
    1.0.0
    createdOn :
    2025-06-02 13:40:42.724997+00:00
    name :
    psdm_stack_example
    attributes :
    {'description': 'Example PSDM stack'}
  • " ], "text/plain": [ " Size: 203MB\n", @@ -646,12 +623,12 @@ " cdp_y (inline, crossline) float64 1MB 0.0 0.0 0.0 ... 0.0 0.0 0.0\n", "Attributes:\n", " apiVersion: 1.0.0\n", - " createdOn: 2025-05-29 14:18:21.113904+00:00\n", + " createdOn: 2025-06-02 13:40:42.724997+00:00\n", " name: psdm_stack_example\n", " attributes: {'description': 'Example PSDM stack'}" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -685,7 +662,7 @@ " 'metadata': {\n", " 'name': 'psdm_stack_example',\n", " 'apiVersion': '1.0.0',\n", - " 'createdOn': '2025-05-29T14:18:21.113904Z',\n", + " 'createdOn': '2025-06-02T13:40:42.724997Z',\n", " 'attributes': {'description': 'Example PSDM stack'}\n", " },\n", " 'variables': [\n", @@ -724,14 +701,14 @@ " 'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n", " 'name': 'cdp_x',\n", " 'longName': 'CDP X (UTM Easting)',\n", - " 'metadata': {'unitsV1': {'length': 'm'}}\n", + " 'metadata': {'unitsV1': {'length': 'm'}, 'attributes': {'attributes': {'MGA': 51}}}\n", " },\n", " {\n", " 'dataType': 'float64',\n", " 'dimensions': [{'name': 'inline', 'size': 256}, {'name': 'crossline', 'size': 512}],\n", " 'name': 'cdp_y',\n", " 'longName': 'CDP Y (UTM Northing)',\n", - " 'metadata': {'unitsV1': {'length': 'm'}}\n", + " 'metadata': {'unitsV1': {'length': 'm'}, 'attributes': {'attributes': {'MGA': 51}}}\n", " }\n", " ]\n", "}\n", @@ -742,7 +719,7 @@ " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", " \u001b[32m'name'\u001b[0m: \u001b[32m'psdm_stack_example'\u001b[0m,\n", " \u001b[32m'apiVersion'\u001b[0m: \u001b[32m'1.0.0'\u001b[0m,\n", - " \u001b[32m'createdOn'\u001b[0m: \u001b[32m'2025-05-29T14:18:21.113904Z'\u001b[0m,\n", + " \u001b[32m'createdOn'\u001b[0m: \u001b[32m'2025-06-02T13:40:42.724997Z'\u001b[0m,\n", " \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'description'\u001b[0m: \u001b[32m'Example PSDM stack'\u001b[0m\u001b[1m}\u001b[0m\n", " \u001b[1m}\u001b[0m,\n", " \u001b[32m'variables'\u001b[0m: \u001b[1m[\u001b[0m\n", @@ -781,14 +758,14 @@ " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_x'\u001b[0m,\n", " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP X \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Easting\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'MGA'\u001b[0m: \u001b[1;36m51\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", " \u001b[1m}\u001b[0m,\n", " \u001b[1m{\u001b[0m\n", " \u001b[32m'dataType'\u001b[0m: \u001b[32m'float64'\u001b[0m,\n", " \u001b[32m'dimensions'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'inline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m256\u001b[0m\u001b[1m}\u001b[0m, \u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'crossline'\u001b[0m, \u001b[32m'size'\u001b[0m: \u001b[1;36m512\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m,\n", " \u001b[32m'name'\u001b[0m: \u001b[32m'cdp_y'\u001b[0m,\n", " \u001b[32m'longName'\u001b[0m: \u001b[32m'CDP Y \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUTM Northing\u001b[0m\u001b[32m)\u001b[0m\u001b[32m'\u001b[0m,\n", - " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", + " \u001b[32m'metadata'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'unitsV1'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'length'\u001b[0m: \u001b[32m'm'\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'attributes'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'MGA'\u001b[0m: \u001b[1;36m51\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n", " \u001b[1m}\u001b[0m\n", " \u001b[1m]\u001b[0m\n", "\u001b[1m}\u001b[0m\n" @@ -817,9 +794,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python (mdio-dev)", + "display_name": "jupyter", "language": "python", - "name": "mdio-dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -831,7 +808,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.13.3" } }, "nbformat": 4,