diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml index 85f3890f7be5..4952966e8b3e 100644 --- a/.github/workflows/python-integration-tests.yml +++ b/.github/workflows/python-integration-tests.yml @@ -131,6 +131,7 @@ jobs: VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }} VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }} VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }} + REDIS_CONNECTION_STRING: ${{ vars.REDIS_CONNECTION_STRING }} run: | cd python poetry run pytest ./tests/integration ./tests/samples -v --junitxml=pytest.xml @@ -242,6 +243,7 @@ jobs: VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }} VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }} VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }} + REDIS_CONNECTION_STRING: ${{ vars.REDIS_CONNECTION_STRING }} run: | if ${{ matrix.os == 'ubuntu-latest' }}; then docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest diff --git a/python/.coveragerc b/python/.coveragerc index 521b2ffe70c9..263f05cd0111 100644 --- a/python/.coveragerc +++ b/python/.coveragerc @@ -10,8 +10,8 @@ omit = semantic_kernel/connectors/memory/mongodb_atlas/* semantic_kernel/connectors/memory/pinecone/* semantic_kernel/connectors/memory/postgres/* - semantic_kernel/connectors/memory/qdrant/* - semantic_kernel/connectors/memory/redis/* + semantic_kernel/connectors/memory/qdrant/qdrant_memory_store.py + semantic_kernel/connectors/memory/redis/redis_memory_store.py semantic_kernel/connectors/memory/usearch/* semantic_kernel/connectors/memory/weaviate/* semantic_kernel/reliability/* @@ -33,4 +33,4 @@ exclude_lines = # TYPE_CHECKING and @overload blocks are never executed during pytest run if TYPE_CHECKING: @overload - @abstractmethod \ No newline at end of file + @abstractmethod diff --git a/python/.cspell.json b/python/.cspell.json index 3b3b3c06d526..5d741a349537 100644 --- a/python/.cspell.json +++ b/python/.cspell.json @@ -47,6 +47,10 @@ "protos", "endregion", "vertexai", - "aiplatform" + "aiplatform", + "serde", + "datamodel", + "vectorstoremodel", + "qdrant" ] } \ No newline at end of file diff --git a/python/mypy.ini b/python/mypy.ini index 6a30f83bd145..9f392f90a3ab 100644 --- a/python/mypy.ini +++ b/python/mypy.ini @@ -26,6 +26,8 @@ ignore_errors = true [mypy-semantic_kernel.connectors.memory.astradb.*] ignore_errors = true +[mypy-semantic_kernel.connectors.memory.azure_ai_search.*] +ignore_errors = false [mypy-semantic_kernel.connectors.memory.azure_cognitive_search.*] ignore_errors = true @@ -50,9 +52,13 @@ ignore_errors = true [mypy-semantic_kernel.connectors.memory.postgres.*] ignore_errors = true +[mypy-semantic_kernel.connectors.memory.qdrant.qdrant_vector_record_store.*] +ignore_errors = true [mypy-semantic_kernel.connectors.memory.qdrant.*] ignore_errors = true +[mypy-semantic_kernel.connectors.memory.redis.redis_vector_record_store.*] +ignore_errors = true [mypy-semantic_kernel.connectors.memory.redis.*] ignore_errors = true diff --git a/python/poetry.lock b/python/poetry.lock index cbfbeef48c5e..bb3f03a317a0 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1,19 +1,19 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "accelerate" -version = "0.31.0" +version = "0.33.0" description = "Accelerate" optional = false python-versions = ">=3.8.0" files = [ - {file = "accelerate-0.31.0-py3-none-any.whl", hash = "sha256:0fc608dc49584f64d04711a39711d73cb0ad4ef3d21cddee7ef2216e29471144"}, - {file = "accelerate-0.31.0.tar.gz", hash = "sha256:b5199865b26106ccf9205acacbe8e4b3b428ad585e7c472d6a46f6fb75b6c176"}, + {file = "accelerate-0.33.0-py3-none-any.whl", hash = "sha256:0a7f33d60ba09afabd028d4f0856dd19c5a734b7a596d637d9dd6e3d0eadbaf3"}, + {file = "accelerate-0.33.0.tar.gz", hash = "sha256:11ba481ed6ea09191775df55ce464aeeba67a024bd0261a44b77b30fb439e26a"}, ] [package.dependencies] -huggingface-hub = "*" -numpy = ">=1.17" +huggingface-hub = ">=0.21.0" +numpy = ">=1.17,<2.0.0" packaging = ">=20.0" psutil = "*" pyyaml = "*" @@ -1946,6 +1946,124 @@ files = [ hpack = ">=4.0,<5" hyperframe = ">=6.0,<7" +[[package]] +name = "hiredis" +version = "2.3.2" +description = "Python wrapper for hiredis" +optional = false +python-versions = ">=3.7" +files = [ + {file = "hiredis-2.3.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:742093f33d374098aa21c1696ac6e4874b52658c870513a297a89265a4d08fe5"}, + {file = "hiredis-2.3.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:9e14fb70ca4f7efa924f508975199353bf653f452e4ef0a1e47549e208f943d7"}, + {file = "hiredis-2.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d7302b4b17fcc1cc727ce84ded7f6be4655701e8d58744f73b09cb9ed2b13df"}, + {file = "hiredis-2.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed63e8b75c193c5e5a8288d9d7b011da076cc314fafc3bfd59ec1d8a750d48c8"}, + {file = "hiredis-2.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b4edee59dc089bc3948f4f6fba309f51aa2ccce63902364900aa0a553a85e97"}, + {file = "hiredis-2.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6481c3b7673a86276220140456c2a6fbfe8d1fb5c613b4728293c8634134824"}, + {file = "hiredis-2.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:684840b014ce83541a087fcf2d48227196576f56ae3e944d4dfe14c0a3e0ccb7"}, + {file = "hiredis-2.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c4c0bcf786f0eac9593367b6279e9b89534e008edbf116dcd0de956524702c8"}, + {file = "hiredis-2.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66ab949424ac6504d823cba45c4c4854af5c59306a1531edb43b4dd22e17c102"}, + {file = "hiredis-2.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:322c668ee1c12d6c5750a4b1057e6b4feee2a75b3d25d630922a463cfe5e7478"}, + {file = "hiredis-2.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bfa73e3f163c6e8b2ec26f22285d717a5f77ab2120c97a2605d8f48b26950dac"}, + {file = "hiredis-2.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:7f39f28ffc65de577c3bc0c7615f149e35bc927802a0f56e612db9b530f316f9"}, + {file = "hiredis-2.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:55ce31bf4711da879b96d511208efb65a6165da4ba91cb3a96d86d5a8d9d23e6"}, + {file = "hiredis-2.3.2-cp310-cp310-win32.whl", hash = "sha256:3dd63d0bbbe75797b743f35d37a4cca7ca7ba35423a0de742ae2985752f20c6d"}, + {file = "hiredis-2.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:ea002656a8d974daaf6089863ab0a306962c8b715db6b10879f98b781a2a5bf5"}, + {file = "hiredis-2.3.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:adfbf2e9c38b77d0db2fb32c3bdaea638fa76b4e75847283cd707521ad2475ef"}, + {file = "hiredis-2.3.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:80b02d27864ebaf9b153d4b99015342382eeaed651f5591ce6f07e840307c56d"}, + {file = "hiredis-2.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd40d2e2f82a483de0d0a6dfd8c3895a02e55e5c9949610ecbded18188fd0a56"}, + {file = "hiredis-2.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfa904045d7cebfb0f01dad51352551cce1d873d7c3f80c7ded7d42f8cac8f89"}, + {file = "hiredis-2.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28bd184b33e0dd6d65816c16521a4ba1ffbe9ff07d66873c42ea4049a62fed83"}, + {file = "hiredis-2.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f70481213373d44614148f0f2e38e7905be3f021902ae5167289413196de4ba4"}, + {file = "hiredis-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb8797b528c1ff81eef06713623562b36db3dafa106b59f83a6468df788ff0d1"}, + {file = "hiredis-2.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02fc71c8333586871602db4774d3a3e403b4ccf6446dc4603ec12df563127cee"}, + {file = "hiredis-2.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0da56915bda1e0a49157191b54d3e27689b70960f0685fdd5c415dacdee2fbed"}, + {file = "hiredis-2.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e2674a5a3168349435b08fa0b82998ed2536eb9acccf7087efe26e4cd088a525"}, + {file = "hiredis-2.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:dc1c3fd49930494a67dcec37d0558d99d84eca8eb3f03b17198424538f2608d7"}, + {file = "hiredis-2.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:14c7b43205e515f538a9defb4e411e0f0576caaeeda76bb9993ed505486f7562"}, + {file = "hiredis-2.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7bac7e02915b970c3723a7a7c5df4ba7a11a3426d2a3f181e041aa506a1ff028"}, + {file = "hiredis-2.3.2-cp311-cp311-win32.whl", hash = "sha256:63a090761ddc3c1f7db5e67aa4e247b4b3bb9890080bdcdadd1b5200b8b89ac4"}, + {file = "hiredis-2.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:70d226ab0306a5b8d408235cabe51d4bf3554c9e8a72d53ce0b3c5c84cf78881"}, + {file = "hiredis-2.3.2-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5c614552c6bd1d0d907f448f75550f6b24fb56cbfce80c094908b7990cad9702"}, + {file = "hiredis-2.3.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9c431431abf55b64347ddc8df68b3ef840269cb0aa5bc2d26ad9506eb4b1b866"}, + {file = "hiredis-2.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a45857e87e9d2b005e81ddac9d815a33efd26ec67032c366629f023fe64fb415"}, + {file = "hiredis-2.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e138d141ec5a6ec800b6d01ddc3e5561ce1c940215e0eb9960876bfde7186aae"}, + {file = "hiredis-2.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:387f655444d912a963ab68abf64bf6e178a13c8e4aa945cb27388fd01a02e6f1"}, + {file = "hiredis-2.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4852f4bf88f0e2d9bdf91279892f5740ed22ae368335a37a52b92a5c88691140"}, + {file = "hiredis-2.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d711c107e83117129b7f8bd08e9820c43ceec6204fff072a001fd82f6d13db9f"}, + {file = "hiredis-2.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92830c16885f29163e1c2da1f3c1edb226df1210ec7e8711aaabba3dd0d5470a"}, + {file = "hiredis-2.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:16b01d9ceae265d4ab9547be0cd628ecaff14b3360357a9d30c029e5ae8b7e7f"}, + {file = "hiredis-2.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:5986fb5f380169270a0293bebebd95466a1c85010b4f1afc2727e4d17c452512"}, + {file = "hiredis-2.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:49532d7939cc51f8e99efc326090c54acf5437ed88b9c904cc8015b3c4eda9c9"}, + {file = "hiredis-2.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8f34801b251ca43ad70691fb08b606a2e55f06b9c9fb1fc18fd9402b19d70f7b"}, + {file = "hiredis-2.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7298562a49d95570ab1c7fc4051e72824c6a80e907993a21a41ba204223e7334"}, + {file = "hiredis-2.3.2-cp312-cp312-win32.whl", hash = "sha256:e1d86b75de787481b04d112067a4033e1ecfda2a060e50318a74e4e1c9b2948c"}, + {file = "hiredis-2.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:6dbfe1887ffa5cf3030451a56a8f965a9da2fa82b7149357752b67a335a05fc6"}, + {file = "hiredis-2.3.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:4fc242e9da4af48714199216eb535b61e8f8d66552c8819e33fc7806bd465a09"}, + {file = "hiredis-2.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e81aa4e9a1fcf604c8c4b51aa5d258e195a6ba81efe1da82dea3204443eba01c"}, + {file = "hiredis-2.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:419780f8583ddb544ffa86f9d44a7fcc183cd826101af4e5ffe535b6765f5f6b"}, + {file = "hiredis-2.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6871306d8b98a15e53a5f289ec1106a3a1d43e7ab6f4d785f95fcef9a7bd9504"}, + {file = "hiredis-2.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88cb0b35b63717ef1e41d62f4f8717166f7c6245064957907cfe177cc144357c"}, + {file = "hiredis-2.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c490191fa1218851f8a80c5a21a05a6f680ac5aebc2e688b71cbfe592f8fec6"}, + {file = "hiredis-2.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:4baf4b579b108062e91bd2a991dc98b9dc3dc06e6288db2d98895eea8acbac22"}, + {file = "hiredis-2.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e627d8ef5e100556e09fb44c9571a432b10e11596d3c4043500080ca9944a91a"}, + {file = "hiredis-2.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:ba3dc0af0def8c21ce7d903c59ea1e8ec4cb073f25ece9edaec7f92a286cd219"}, + {file = "hiredis-2.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:56e9b7d6051688ca94e68c0c8a54a243f8db841911b683cedf89a29d4de91509"}, + {file = "hiredis-2.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:380e029bb4b1d34cf560fcc8950bf6b57c2ef0c9c8b7c7ac20b7c524a730fadd"}, + {file = "hiredis-2.3.2-cp37-cp37m-win32.whl", hash = "sha256:948d9f2ca7841794dd9b204644963a4bcd69ced4e959b0d4ecf1b8ce994a6daa"}, + {file = "hiredis-2.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:cfa67afe2269b2d203cd1389c00c5bc35a287cd57860441fb0e53b371ea6a029"}, + {file = "hiredis-2.3.2-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:bcbe47da0aebc00a7cfe3ebdcff0373b86ce2b1856251c003e3d69c9db44b5a7"}, + {file = "hiredis-2.3.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f2c9c0d910dd3f7df92f0638e7f65d8edd7f442203caf89c62fc79f11b0b73f8"}, + {file = "hiredis-2.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:01b6c24c0840ac7afafbc4db236fd55f56a9a0919a215c25a238f051781f4772"}, + {file = "hiredis-2.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1f567489f422d40c21e53212a73bef4638d9f21043848150f8544ef1f3a6ad1"}, + {file = "hiredis-2.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28adecb308293e705e44087a1c2d557a816f032430d8a2a9bb7873902a1c6d48"}, + {file = "hiredis-2.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:27e9619847e9dc70b14b1ad2d0fb4889e7ca18996585c3463cff6c951fd6b10b"}, + {file = "hiredis-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a0026cfbf29f07649b0e34509091a2a6016ff8844b127de150efce1c3aff60b"}, + {file = "hiredis-2.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9de7586522e5da6bee83c9cf0dcccac0857a43249cb4d721a2e312d98a684d1"}, + {file = "hiredis-2.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e58494f282215fc461b06709e9a195a24c12ba09570f25bdf9efb036acc05101"}, + {file = "hiredis-2.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:de3a32b4b76d46f1eb42b24a918d51d8ca52411a381748196241d59a895f7c5c"}, + {file = "hiredis-2.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1979334ccab21a49c544cd1b8d784ffb2747f99a51cb0bd0976eebb517628382"}, + {file = "hiredis-2.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:0c0773266e1c38a06e7593bd08870ac1503f5f0ce0f5c63f2b4134b090b5d6a4"}, + {file = "hiredis-2.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bd1cee053416183adcc8e6134704c46c60c3f66b8faaf9e65bf76191ca59a2f7"}, + {file = "hiredis-2.3.2-cp38-cp38-win32.whl", hash = "sha256:5341ce3d01ef3c7418a72e370bf028c7aeb16895e79e115fe4c954fff990489e"}, + {file = "hiredis-2.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:8fc7197ff33047ce43a67851ccf190acb5b05c52fd4a001bb55766358f04da68"}, + {file = "hiredis-2.3.2-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:f47775e27388b58ce52f4f972f80e45b13c65113e9e6b6bf60148f893871dc9b"}, + {file = "hiredis-2.3.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:9412a06b8a8e09abd6313d96864b6d7713c6003a365995a5c70cfb9209df1570"}, + {file = "hiredis-2.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3020b60e3fc96d08c2a9b011f1c2e2a6bdcc09cb55df93c509b88be5cb791df"}, + {file = "hiredis-2.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53d0f2c59bce399b8010a21bc779b4f8c32d0f582b2284ac8c98dc7578b27bc4"}, + {file = "hiredis-2.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57c0d0c7e308ed5280a4900d4468bbfec51f0e1b4cde1deae7d4e639bc6b7766"}, + {file = "hiredis-2.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d63318ca189fddc7e75f6a4af8eae9c0545863619fb38cfba5f43e81280b286"}, + {file = "hiredis-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e741ffe4e2db78a1b9dd6e5d29678ce37fbaaf65dfe132e5b82a794413302ef1"}, + {file = "hiredis-2.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb98038ccd368e0d88bd92ee575c58cfaf33e77f788c36b2a89a84ee1936dc6b"}, + {file = "hiredis-2.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:eae62ed60d53b3561148bcd8c2383e430af38c0deab9f2dd15f8874888ffd26f"}, + {file = "hiredis-2.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ca33c175c1cf60222d9c6d01c38fc17ec3a484f32294af781de30226b003e00f"}, + {file = "hiredis-2.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c5f6972d2bdee3cd301d5c5438e31195cf1cabf6fd9274491674d4ceb46914d"}, + {file = "hiredis-2.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:a6b54dabfaa5dbaa92f796f0c32819b4636e66aa8e9106c3d421624bd2a2d676"}, + {file = "hiredis-2.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e96cd35df012a17c87ae276196ea8f215e77d6eeca90709eb03999e2d5e3fd8a"}, + {file = "hiredis-2.3.2-cp39-cp39-win32.whl", hash = "sha256:63b99b5ea9fe4f21469fb06a16ca5244307678636f11917359e3223aaeca0b67"}, + {file = "hiredis-2.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:a50c8af811b35b8a43b1590cf890b61ff2233225257a3cad32f43b3ec7ff1b9f"}, + {file = "hiredis-2.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7e8bf4444b09419b77ce671088db9f875b26720b5872d97778e2545cd87dba4a"}, + {file = "hiredis-2.3.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bd42d0d45ea47a2f96babd82a659fbc60612ab9423a68e4a8191e538b85542a"}, + {file = "hiredis-2.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80441b55edbef868e2563842f5030982b04349408396e5ac2b32025fb06b5212"}, + {file = "hiredis-2.3.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec444ab8f27562a363672d6a7372bc0700a1bdc9764563c57c5f9efa0e592b5f"}, + {file = "hiredis-2.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f9f606e810858207d4b4287b4ef0dc622c2aa469548bf02b59dcc616f134f811"}, + {file = "hiredis-2.3.2-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c3dde4ca00fe9eee3b76209711f1941bb86db42b8a75d7f2249ff9dfc026ab0e"}, + {file = "hiredis-2.3.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4dd676107a1d3c724a56a9d9db38166ad4cf44f924ee701414751bd18a784a0"}, + {file = "hiredis-2.3.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce42649e2676ad783186264d5ffc788a7612ecd7f9effb62d51c30d413a3eefe"}, + {file = "hiredis-2.3.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e3f8b1733078ac663dad57e20060e16389a60ab542f18a97931f3a2a2dd64a4"}, + {file = "hiredis-2.3.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:532a84a82156a82529ec401d1c25d677c6543c791e54a263aa139541c363995f"}, + {file = "hiredis-2.3.2-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d59f88c4daa36b8c38e59ac7bffed6f5d7f68eaccad471484bf587b28ccc478"}, + {file = "hiredis-2.3.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a91a14dd95e24dc078204b18b0199226ee44644974c645dc54ee7b00c3157330"}, + {file = "hiredis-2.3.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb777a38797c8c7df0444533119570be18d1a4ce5478dffc00c875684df7bfcb"}, + {file = "hiredis-2.3.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d47c915897a99d0d34a39fad4be97b4b709ab3d0d3b779ebccf2b6024a8c681e"}, + {file = "hiredis-2.3.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:333b5e04866758b11bda5f5315b4e671d15755fc6ed3b7969721bc6311d0ee36"}, + {file = "hiredis-2.3.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c8937f1100435698c18e4da086968c4b5d70e86ea718376f833475ab3277c9aa"}, + {file = "hiredis-2.3.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa45f7d771094b8145af10db74704ab0f698adb682fbf3721d8090f90e42cc49"}, + {file = "hiredis-2.3.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33d5ebc93c39aed4b5bc769f8ce0819bc50e74bb95d57a35f838f1c4378978e0"}, + {file = "hiredis-2.3.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a797d8c7df9944314d309b0d9e1b354e2fa4430a05bb7604da13b6ad291bf959"}, + {file = "hiredis-2.3.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e15a408f71a6c8c87b364f1f15a6cd9c1baca12bbc47a326ac8ab99ec7ad3c64"}, + {file = "hiredis-2.3.2.tar.gz", hash = "sha256:733e2456b68f3f126ddaf2cd500a33b25146c3676b97ea843665717bda0c5d43"}, +] + [[package]] name = "hpack" version = "4.0.0" @@ -2194,20 +2312,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "intel-openmp" -version = "2021.4.0" -description = "Intel OpenMP* Runtime Library" -optional = false -python-versions = "*" -files = [ - {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"}, - {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"}, -] - [[package]] name = "ipykernel" version = "6.29.4" @@ -2819,24 +2923,6 @@ files = [ {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, ] -[[package]] -name = "mkl" -version = "2021.4.0" -description = "IntelĀ® oneAPI Math Kernel Library" -optional = false -python-versions = "*" -files = [ - {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"}, - {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"}, - {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"}, - {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"}, -] - -[package.dependencies] -intel-openmp = "==2021.*" -tbb = "==2021.*" - [[package]] name = "mmh3" version = "4.1.0" @@ -3511,25 +3597,24 @@ nvidia-nvjitlink-cu12 = "*" [[package]] name = "nvidia-nccl-cu12" -version = "2.20.5" +version = "2.19.3" description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = false python-versions = ">=3" files = [ - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, - {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, + {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"}, ] [[package]] name = "nvidia-nvjitlink-cu12" -version = "12.5.40" +version = "12.5.82" description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" files = [ - {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"}, - {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"}, - {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"}, + {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_aarch64.whl", hash = "sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27"}, + {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"}, + {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"}, ] [[package]] @@ -5232,7 +5317,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -5396,17 +5480,18 @@ fastembed-gpu = ["fastembed-gpu (==0.2.7)"] [[package]] name = "redis" -version = "4.6.0" +version = "5.0.7" description = "Python client for Redis database and key-value store" optional = false python-versions = ">=3.7" files = [ - {file = "redis-4.6.0-py3-none-any.whl", hash = "sha256:e2b03db868160ee4591de3cb90d40ebb50a90dd302138775937f6a42b7ed183c"}, - {file = "redis-4.6.0.tar.gz", hash = "sha256:585dc516b9eb042a619ef0a39c3d7d55fe81bdb4df09a52c9cdde0d07bf1aa7d"}, + {file = "redis-5.0.7-py3-none-any.whl", hash = "sha256:0e479e24da960c690be5d9b96d21f7b918a98c0cf49af3b6fafaa0753f93a0db"}, + {file = "redis-5.0.7.tar.gz", hash = "sha256:8f611490b93c8109b50adc317b31bfd84fff31def3475b92e7e80bf39f48175b"}, ] [package.dependencies] -async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""} +async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} +hiredis = {version = ">=1.0.0", optional = true, markers = "extra == \"hiredis\""} [package.extras] hiredis = ["hiredis (>=1.0.0)"] @@ -6238,19 +6323,6 @@ files = [ [package.dependencies] mpmath = ">=0.19" -[[package]] -name = "tbb" -version = "2021.12.0" -description = "IntelĀ® oneAPI Threading Building Blocks (oneTBB)" -optional = false -python-versions = "*" -files = [ - {file = "tbb-2021.12.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:f2cc9a7f8ababaa506cbff796ce97c3bf91062ba521e15054394f773375d81d8"}, - {file = "tbb-2021.12.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:a925e9a7c77d3a46ae31c34b0bb7f801c4118e857d137b68f68a8e458fcf2bd7"}, - {file = "tbb-2021.12.0-py3-none-win32.whl", hash = "sha256:b1725b30c174048edc8be70bd43bb95473f396ce895d91151a474d0fa9f450a8"}, - {file = "tbb-2021.12.0-py3-none-win_amd64.whl", hash = "sha256:fc2772d850229f2f3df85f1109c4844c495a2db7433d38200959ee9265b34789"}, -] - [[package]] name = "tenacity" version = "8.3.0" @@ -6425,38 +6497,42 @@ files = [ [[package]] name = "torch" -version = "2.3.1" +version = "2.2.2" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = false python-versions = ">=3.8.0" files = [ - {file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"}, - {file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"}, - {file = "torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b"}, - {file = "torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d"}, - {file = "torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39"}, - {file = "torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab"}, - {file = "torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a"}, - {file = "torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac"}, - {file = "torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a"}, - {file = "torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c"}, - {file = "torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d"}, - {file = "torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8"}, - {file = "torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e9ba746832b8d069cacb45f312cadd8ad02b81ea527ec9766c0e7404bb3feb"}, - {file = "torch-2.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:462d1c07dbf6bb5d9d2f3316fee73a24f3d12cd8dacf681ad46ef6418f7f6626"}, - {file = "torch-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff60bf7ce3de1d43ad3f6969983f321a31f0a45df3690921720bcad6a8596cc4"}, - {file = "torch-2.3.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bee0bd33dc58aa8fc8a7527876e9b9a0e812ad08122054a5bff2ce5abf005b10"}, - {file = "torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad"}, - {file = "torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc"}, - {file = "torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011"}, - {file = "torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb"}, + {file = "torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bc889d311a855dd2dfd164daf8cc903a6b7273a747189cebafdd89106e4ad585"}, + {file = "torch-2.2.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:15dffa4cc3261fa73d02f0ed25f5fa49ecc9e12bf1ae0a4c1e7a88bbfaad9030"}, + {file = "torch-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:11e8fe261233aeabd67696d6b993eeb0896faa175c6b41b9a6c9f0334bdad1c5"}, + {file = "torch-2.2.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:b2e2200b245bd9f263a0d41b6a2dab69c4aca635a01b30cca78064b0ef5b109e"}, + {file = "torch-2.2.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:877b3e6593b5e00b35bbe111b7057464e76a7dd186a287280d941b564b0563c2"}, + {file = "torch-2.2.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:ad4c03b786e074f46606f4151c0a1e3740268bcf29fbd2fdf6666d66341c1dcb"}, + {file = "torch-2.2.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:32827fa1fbe5da8851686256b4cd94cc7b11be962862c2293811c94eea9457bf"}, + {file = "torch-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:f9ef0a648310435511e76905f9b89612e45ef2c8b023bee294f5e6f7e73a3e7c"}, + {file = "torch-2.2.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:95b9b44f3bcebd8b6cd8d37ec802048c872d9c567ba52c894bba90863a439059"}, + {file = "torch-2.2.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:49aa4126ede714c5aeef7ae92969b4b0bbe67f19665106463c39f22e0a1860d1"}, + {file = "torch-2.2.2-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:cf12cdb66c9c940227ad647bc9cf5dba7e8640772ae10dfe7569a0c1e2a28aca"}, + {file = "torch-2.2.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:89ddac2a8c1fb6569b90890955de0c34e1724f87431cacff4c1979b5f769203c"}, + {file = "torch-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:451331406b760f4b1ab298ddd536486ab3cfb1312614cfe0532133535be60bea"}, + {file = "torch-2.2.2-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:eb4d6e9d3663e26cd27dc3ad266b34445a16b54908e74725adb241aa56987533"}, + {file = "torch-2.2.2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:bf9558da7d2bf7463390b3b2a61a6a3dbb0b45b161ee1dd5ec640bf579d479fc"}, + {file = "torch-2.2.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cd2bf7697c9e95fb5d97cc1d525486d8cf11a084c6af1345c2c2c22a6b0029d0"}, + {file = "torch-2.2.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b421448d194496e1114d87a8b8d6506bce949544e513742b097e2ab8f7efef32"}, + {file = "torch-2.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:3dbcd563a9b792161640c0cffe17e3270d85e8f4243b1f1ed19cca43d28d235b"}, + {file = "torch-2.2.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:31f4310210e7dda49f1fb52b0ec9e59382cfcb938693f6d5378f25b43d7c1d29"}, + {file = "torch-2.2.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c795feb7e8ce2e0ef63f75f8e1ab52e7fd5e1a4d7d0c31367ade1e3de35c9e95"}, + {file = "torch-2.2.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a6e5770d68158d07456bfcb5318b173886f579fdfbf747543901ce718ea94782"}, + {file = "torch-2.2.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:67dcd726edff108e2cd6c51ff0e416fd260c869904de95750e80051358680d24"}, + {file = "torch-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:539d5ef6c4ce15bd3bd47a7b4a6e7c10d49d4d21c0baaa87c7d2ef8698632dfb"}, + {file = "torch-2.2.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:dff696de90d6f6d1e8200e9892861fd4677306d0ef604cb18f2134186f719f82"}, + {file = "torch-2.2.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:3a4dd910663fd7a124c056c878a52c2b0be4a5a424188058fe97109d4436ee42"}, ] [package.dependencies] filelock = "*" fsspec = "*" jinja2 = "*" -mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""} networkx = "*" nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} @@ -6467,10 +6543,10 @@ nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linu nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} sympy = "*" -triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} +triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} typing-extensions = ">=4.8.0" [package.extras] @@ -6604,17 +6680,17 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "triton" -version = "2.3.1" +version = "2.2.0" description = "A language and compiler for custom Deep Learning operations" optional = false python-versions = "*" files = [ - {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"}, - {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"}, - {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"}, - {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"}, - {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"}, - {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"}, + {file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"}, + {file = "triton-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da58a152bddb62cafa9a857dd2bc1f886dbf9f9c90a2b5da82157cd2b34392b0"}, + {file = "triton-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af58716e721460a61886668b205963dc4d1e4ac20508cc3f623aef0d70283d5"}, + {file = "triton-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe46d3ab94a8103e291bd44c741cc294b91d1d81c1a2888254cbf7ff846dab"}, + {file = "triton-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ce26093e539d727e7cf6f6f0d932b1ab0574dc02567e684377630d86723ace"}, + {file = "triton-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:227cc6f357c5efcb357f3867ac2a8e7ecea2298cd4606a8ba1e931d1d5a947df"}, ] [package.dependencies] @@ -6642,6 +6718,35 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-cffi" +version = "1.16.0.20240331" +description = "Typing stubs for cffi" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-cffi-1.16.0.20240331.tar.gz", hash = "sha256:b8b20d23a2b89cfed5f8c5bc53b0cb8677c3aac6d970dbc771e28b9c698f5dee"}, + {file = "types_cffi-1.16.0.20240331-py3-none-any.whl", hash = "sha256:a363e5ea54a4eb6a4a105d800685fde596bc318089b025b27dee09849fe41ff0"}, +] + +[package.dependencies] +types-setuptools = "*" + +[[package]] +name = "types-pyopenssl" +version = "24.1.0.20240425" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pyOpenSSL-24.1.0.20240425.tar.gz", hash = "sha256:0a7e82626c1983dc8dc59292bf20654a51c3c3881bcbb9b337c1da6e32f0204e"}, + {file = "types_pyOpenSSL-24.1.0.20240425-py3-none-any.whl", hash = "sha256:f51a156835555dd2a1f025621e8c4fbe7493470331afeef96884d1d29bf3a473"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-cffi = "*" + [[package]] name = "types-pyyaml" version = "6.0.12.20240311" @@ -6653,6 +6758,32 @@ files = [ {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, ] +[[package]] +name = "types-redis" +version = "4.6.0.20240425" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-redis-4.6.0.20240425.tar.gz", hash = "sha256:9402a10ee931d241fdfcc04592ebf7a661d7bb92a8dea631279f0d8acbcf3a22"}, + {file = "types_redis-4.6.0.20240425-py3-none-any.whl", hash = "sha256:ac5bc19e8f5997b9e76ad5d9cf15d0392d9f28cf5fc7746ea4a64b989c45c6a8"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + +[[package]] +name = "types-setuptools" +version = "70.1.0.20240627" +description = "Typing stubs for setuptools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-setuptools-70.1.0.20240627.tar.gz", hash = "sha256:385907a47b5cf302b928ce07953cd91147d5de6f3da604c31905fdf0ec309e83"}, + {file = "types_setuptools-70.1.0.20240627-py3-none-any.whl", hash = "sha256:c7bdf05cd0a8b66868b4774c7b3c079d01ae025d8c9562bfc8bf2ff44d263c9c"}, +] + [[package]] name = "typing-extensions" version = "4.12.0" @@ -7396,11 +7527,11 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [extras] -all = ["azure-ai-inference", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "chromadb", "ipykernel", "milvus", "mistralai", "motor", "ollama", "pinecone-client", "psycopg", "pyarrow", "pymilvus", "qdrant-client", "redis", "sentence-transformers", "transformers", "usearch", "weaviate-client"] +all = ["azure-ai-inference", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "chromadb", "ipykernel", "milvus", "mistralai", "motor", "ollama", "pinecone-client", "psycopg", "pyarrow", "pymilvus", "qdrant-client", "redis", "sentence-transformers", "torch", "transformers", "usearch", "weaviate-client"] azure = ["azure-ai-inference", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents"] chromadb = ["chromadb"] google = ["google-cloud-aiplatform", "google-generativeai"] -hugging-face = ["sentence-transformers", "transformers"] +hugging-face = ["sentence-transformers", "torch", "transformers"] milvus = ["milvus", "pymilvus"] mistralai = ["mistralai"] mongo = ["motor"] @@ -7409,11 +7540,11 @@ ollama = ["ollama"] pinecone = ["pinecone-client"] postgres = ["psycopg"] qdrant = ["qdrant-client"] -redis = ["redis"] +redis = ["redis", "types-redis"] usearch = ["pyarrow", "usearch"] weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "18240574b7c1cbe7dec2d367ab529d757c37fa1c11678d10ec63d858519db9c6" +content-hash = "85b42a18e2642466248624e14916498073a541ed31f43a26bab12bf75486ef24" diff --git a/python/pyproject.toml b/python/pyproject.toml index af276e491f11..2683b935e74d 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -57,8 +57,9 @@ chromadb = { version = ">=0.4.13,<0.6.0", optional = true} google-cloud-aiplatform = { version = "^1.60.0", optional = true} google-generativeai = { version = "^0.7.2", optional = true} # hugging face -transformers = { version = "^4.28.1", extras=["torch"], optional = true} +transformers = { version = "^4.28.1", extras=['torch'], optional = true} sentence-transformers = { version = "^2.2.2", optional = true} +torch = {version = "2.2.2", optional = true} # mongo motor = { version = "^3.3.2", optional = true } # notebooks @@ -73,20 +74,20 @@ ollama = { version = "^0.2.1", optional = true} # pinecone pinecone-client = { version = ">=3.0.0", optional = true} # postgres -psycopg = { version="^3.1.9", extras=["binary","pool"], optional = true} +psycopg = { version="^3.2.1", extras=["binary","pool"], optional = true} # qdrant qdrant-client = { version = '^1.9', optional = true} # redis -redis = { version = "^4.6.0", optional = true} +redis = { version = "^5.0.7", extras=['hiredis'], optional = true} +types-redis = { version="^4.6.0.20240425", optional = true } # usearch usearch = { version = "^2.9", optional = true} pyarrow = { version = ">=12.0.1,<18.0.0", optional = true} weaviate-client = { version = ">=3.18,<5.0", optional = true} -ruff = "0.5.2" +pandas = {version = "^2.2.2", optional = true} [tool.poetry.group.dev.dependencies] pre-commit = ">=3.7.1" -ruff = ">=0.5" ipykernel = "^6.29.4" nbconvert = "^7.16.4" pytest = "^8.2.1" @@ -96,6 +97,7 @@ pytest-asyncio = "^0.23.7" snoop = "^0.4.3" mypy = ">=1.10.0" types-PyYAML = "^6.0.12.20240311" +ruff = "^0.5.2" [tool.poetry.group.unit-tests] optional = true @@ -109,8 +111,14 @@ mistralai = "^0.4.1" ollama = "^0.2.1" google-cloud-aiplatform = "^1.60.0" google-generativeai = "^0.7.2" -transformers = { version = "^4.28.1", extras=["torch"]} -sentence-transformers = "^2.2.2" +transformers = { version = "^4.28.1", extras=['torch']} +sentence-transformers = { version = "^2.2.2"} +torch = {version = "2.2.2"} +# qdrant +qdrant-client = '^1.9' +# redis +redis = { version = "^5.0.7", extras=['hiredis']} +pandas = {version = "^2.2.2"} [tool.poetry.group.tests] optional = true @@ -129,8 +137,9 @@ chromadb = ">=0.4.13,<0.6.0" google-cloud-aiplatform = "^1.60.0" google-generativeai = "^0.7.2" # hugging face -transformers = { version = "^4.28.1", extras=["torch"]} -sentence-transformers = "^2.2.2" +transformers = { version = "^4.28.1", extras=['torch']} +sentence-transformers = { version = "^2.2.2"} +torch = {version = "2.2.2"} # milvus pymilvus = ">=2.3,<2.4.4" milvus = { version = ">=2.3,<2.3.8", markers = 'sys_platform != "win32"'} @@ -147,21 +156,23 @@ psycopg = { version="^3.1.9", extras=["binary","pool"]} # qdrant qdrant-client = '^1.9' # redis -redis = "^4.6.0" +redis = { version="^5.0.7", extras=['hiredis']} +types-redis = { version="^4.6.0.20240425" } # usearch usearch = "^2.9" pyarrow = ">=12.0.1,<18.0.0" # weaviate weaviate-client = ">=3.18,<5.0" +pandas = {version = "^2.2.2"} # Extras are exposed to pip, this allows a user to easily add the right dependencies to their environment [tool.poetry.extras] -all = ["transformers", "sentence-transformers", "qdrant-client", "chromadb", "pymilvus", "milvus", "mistralai", "ollama", "google", "weaviate-client", "pinecone-client", "psycopg", "redis", "azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "usearch", "pyarrow", "ipykernel", "motor"] +all = ["transformers", "sentence-transformers", "torch", "qdrant-client", "chromadb", "pymilvus", "milvus", "mistralai", "ollama", "google", "weaviate-client", "pinecone-client", "psycopg", "redis", "azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "usearch", "pyarrow", "ipykernel", "motor"] azure = ["azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "msgraph-sdk"] chromadb = ["chromadb"] google = ["google-cloud-aiplatform", "google-generativeai"] -hugging_face = ["transformers", "sentence-transformers"] +hugging_face = ["transformers", "sentence-transformers", "torch"] milvus = ["pymilvus", "milvus"] mistralai = ["mistralai"] ollama = ["ollama"] @@ -170,7 +181,7 @@ notebooks = ["ipykernel"] pinecone = ["pinecone-client"] postgres = ["psycopg"] qdrant = ["qdrant-client"] -redis = ["redis"] +redis = ["redis", "types-redis"] usearch = ["usearch", "pyarrow"] weaviate = ["weaviate-client"] diff --git a/python/samples/concepts/memory/data_models.py b/python/samples/concepts/memory/data_models.py new file mode 100644 index 000000000000..a120254497ec --- /dev/null +++ b/python/samples/concepts/memory/data_models.py @@ -0,0 +1,160 @@ +# Copyright (c) Microsoft. All rights reserved. + +from dataclasses import dataclass, field +from typing import Annotated, Any +from uuid import uuid4 + +from pandas import DataFrame +from pydantic import Field + +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.kernel_pydantic import KernelBaseModel + +# This concept shows the different ways you can create a vector store data model +# using dataclasses, Pydantic, and Python classes. +# As well as using types like Pandas Dataframes. + +# There are a number of universal things about these data models: +# they must specify the type of field through the annotation (or the definition). +# there must be at least one field of type VectorStoreRecordKeyField. +# If you set the embedding_property_name in the VectorStoreRecordDataField, that field must exist and be a vector field. +# A unannotated field is allowed but must have a default value. + +# The purpose of these models is to be what you pass to and get back from a vector store. +# There maybe limitations to data types that the vector store can handle, +# so not every store will be able to handle completely the same model. +# for instance, some stores only allow a string as the keyfield, while others allow str and int, +# so defining the key with a int, might make some stores unusable. + +# The decorator takes the class and pulls out the fields and annotations to create a definition, +# of type VectorStoreRecordDefinition. +# This definition is used for the vector store to know how to handle the data model. + +# You can also create the definition yourself, and pass it to the vector stores together with a standard type, +# like a dict or list. +# Or you can use the definition in container mode with something like a Pandas Dataframe. + + +# Data model using built-in Python dataclasses +@vectorstoremodel +@dataclass +class DataModelDataclass: + vector: Annotated[list[float], VectorStoreRecordVectorField] + key: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = ( + "content1" + ) + other: str | None = None + + +# Data model using Pydantic BaseModels +@vectorstoremodel +class DataModelPydantic(KernelBaseModel): + vector: Annotated[list[float], VectorStoreRecordVectorField] + key: Annotated[str, VectorStoreRecordKeyField()] = Field(default_factory=lambda: str(uuid4())) + content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = ( + "content1" + ) + other: str | None = None + + +# Data model using Pydantic BaseModels with mixed annotations (from pydantic and SK) +@vectorstoremodel +class DataModelPydanticComplex(KernelBaseModel): + vector: Annotated[list[float], VectorStoreRecordVectorField] + key: Annotated[str, Field(default_factory=lambda: str(uuid4())), VectorStoreRecordKeyField()] + content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] = ( + "content1" + ) + other: str | None = None + + +# Data model using Python classes +# This one includes a custom serialize and deserialize method +@vectorstoremodel +class DataModelPython: + def __init__( + self, + vector: Annotated[list[float], VectorStoreRecordVectorField], + key: Annotated[str, VectorStoreRecordKeyField] = None, + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector") + ] = "content1", + other: str | None = None, + ): + self.vector = vector + self.other = other + self.key = key or str(uuid4()) + self.content = content + + def __str__(self) -> str: + return f"DataModelPython(vector={self.vector}, key={self.key}, content={self.content}, other={self.other})" + + def serialize(self) -> dict[str, Any]: + return { + "vector": self.vector, + "key": self.key, + "content": self.content, + } + + @classmethod + def deserialize(cls, obj: dict[str, Any]) -> "DataModelDataclass": + return cls( + vector=obj["vector"], + key=obj["key"], + content=obj["content"], + ) + + +# Data model definition for use with Pandas +# note the container mode flag, which makes sure that records that are returned are in a container +# even when requesting a batch of records. +# There is also a to_dict and from_dict method, which are used to convert the data model to and from a dict, +# these should be specific to the type used, if using dict as type then these can be left off. +data_model_definition_pandas = VectorStoreRecordDefinition( + fields={ + "vector": VectorStoreRecordVectorField(property_type="list[float]"), + "key": VectorStoreRecordKeyField(property_type="str"), + "content": VectorStoreRecordDataField( + property_type="str", has_embedding=True, embedding_property_name="vector" + ), + }, + container_mode=True, + to_dict=lambda record, **_: record.to_dict(orient="records"), + from_dict=lambda records, **_: DataFrame(records), +) + + +if __name__ == "__main__": + data_item1 = DataModelDataclass(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None) + data_item2 = DataModelPydantic(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None) + data_item3 = DataModelPydanticComplex(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None) + data_item4 = DataModelPython(content="Hello, world!", vector=[1.0, 2.0, 3.0], other=None) + print("Example records:") + print(f"DataClass:\n {data_item1}", end="\n\n") + print(f"Pydantic:\n {data_item2}", end="\n\n") + print(f"Pydantic with annotations:\n {data_item3}", end="\n\n") + print(f"Python:\n {data_item4}", end="\n\n") + + print("Item definitions:") + print(f"DataClass:\n {data_item1.__kernel_vectorstoremodel_definition__}", end="\n\n") + print(f"Pydantic:\n {data_item2.__kernel_vectorstoremodel_definition__}", end="\n\n") + print(f"Pydantic with annotations:\n {data_item3.__kernel_vectorstoremodel_definition__}", end="\n\n") + print(f"Python:\n {data_item4.__kernel_vectorstoremodel_definition__}", end="\n\n") + print(f"Definition for use with Pandas:\n {data_model_definition_pandas}", end="\n\n") + if ( + data_item1.__kernel_vectorstoremodel_definition__.fields + == data_item2.__kernel_vectorstoremodel_definition__.fields + == data_item3.__kernel_vectorstoremodel_definition__.fields + == data_item4.__kernel_vectorstoremodel_definition__.fields + == data_model_definition_pandas.fields + ): + print("All data models are the same") + else: + print("Data models are not the same") diff --git a/python/samples/concepts/memory/new_memory.py b/python/samples/concepts/memory/new_memory.py new file mode 100644 index 000000000000..73ccf501207f --- /dev/null +++ b/python/samples/concepts/memory/new_memory.py @@ -0,0 +1,133 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from dataclasses import dataclass, field +from typing import Annotated +from uuid import uuid4 + +import numpy as np + +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( + OpenAIEmbeddingPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection import AzureAISearchCollection +from semantic_kernel.connectors.memory.qdrant.qdrant_collection import QdrantCollection +from semantic_kernel.connectors.memory.redis.redis_collection import RedisHashsetCollection, RedisJsonCollection +from semantic_kernel.connectors.memory.volatile.volatile_collection import VolatileCollection +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.data.vector_store_record_utils import VectorStoreRecordUtils + + +@vectorstoremodel +@dataclass +class MyDataModelArray: + vector: Annotated[ + np.ndarray | None, + VectorStoreRecordVectorField( + embedding_settings={"embedding": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)}, + index_kind="hnsw", + dimensions=1536, + distance_function="cosine", + property_type="float", + serialize_function=np.ndarray.tolist, + deserialize_function=np.array, + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + +@vectorstoremodel +@dataclass +class MyDataModelList: + vector: Annotated[ + list[float] | None, + VectorStoreRecordVectorField( + embedding_settings={"embedding": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)}, + index_kind="hnsw", + dimensions=1536, + distance_function="cosine", + property_type="float", + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + +# configuration +# specify which store (redis_json, redis_hash, qdrant, Azure AI Search or volatile) to use +# and which model (vectors as list or as numpy arrays) +store = "volatile" +collection_name = "test" +MyDataModel = MyDataModelArray + +stores: dict[str, VectorStoreRecordCollection] = { + "ai_search": AzureAISearchCollection[MyDataModel]( + data_model_type=MyDataModel, + ), + "redis_json": RedisJsonCollection[MyDataModel]( + data_model_type=MyDataModel, + collection_name=collection_name, + prefix_collection_name_to_key_names=True, + ), + "redis_hashset": RedisHashsetCollection[MyDataModel]( + data_model_type=MyDataModel, + collection_name=collection_name, + prefix_collection_name_to_key_names=True, + ), + "qdrant": QdrantCollection[MyDataModel]( + data_model_type=MyDataModel, collection_name=collection_name, prefer_grpc=True, named_vectors=False + ), + "volatile": VolatileCollection[MyDataModel]( + data_model_type=MyDataModel, + collection_name=collection_name, + ), +} + + +async def main(): + kernel = Kernel() + service_id = "embedding" + ai_model_id = "text-embedding-3-small" + kernel.add_service(OpenAITextEmbedding(service_id=service_id, ai_model_id=ai_model_id)) + async with stores[store] as record_store: + await record_store.create_collection_if_not_exists() + + record1 = MyDataModel(content="My text", id="e6103c03-487f-4d7d-9c23-4723651c17f4") + record2 = MyDataModel(content="My other text", id="09caec77-f7e1-466a-bcec-f1d51c5b15be") + + records = await VectorStoreRecordUtils(kernel).add_vector_to_records( + [record1, record2], data_model_type=MyDataModel + ) + keys = await record_store.upsert_batch(records) + print(f"upserted {keys=}") + + results = await record_store.get_batch([record1.id, record2.id]) + if results: + for result in results: + print(f"found {result.id=}") + print(f"{result.content=}") + if result.vector is not None: + print(f"{result.vector[:5]=}") + else: + print("not found") + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/python/samples/concepts/memory/pandas_memory.py b/python/samples/concepts/memory/pandas_memory.py new file mode 100644 index 000000000000..79a7a5446b4e --- /dev/null +++ b/python/samples/concepts/memory/pandas_memory.py @@ -0,0 +1,75 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from uuid import uuid4 + +import pandas as pd + +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( + OpenAIEmbeddingPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection import AzureAISearchCollection +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.data.vector_store_record_utils import VectorStoreRecordUtils + +model_fields = VectorStoreRecordDefinition( + container_mode=True, + fields={ + "content": VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector"), + "id": VectorStoreRecordKeyField(), + "vector": VectorStoreRecordVectorField( + embedding_settings={"embedding": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)} + ), + }, + to_dict=lambda record, **_: record.to_dict(orient="records"), + from_dict=lambda records, **_: pd.DataFrame(records), +) + + +async def main(): + # setup the kernel + kernel = Kernel() + kernel.add_service(OpenAITextEmbedding(service_id="embedding", ai_model_id="text-embedding-3-small")) + + # create the record collection + record_collection = AzureAISearchCollection[pd.DataFrame]( + data_model_type=pd.DataFrame, + data_model_definition=model_fields, + ) + # create some records + records = [ + {"id": str(uuid4()), "content": "my dict text", "vector": None}, + {"id": str(uuid4()), "content": "my second text", "vector": None}, + ] + + # create the dataframe and add the embeddings + df = pd.DataFrame(records) + df = await VectorStoreRecordUtils(kernel).add_vector_to_records(df, None, data_model_definition=model_fields) + print("Records with embeddings:") + print(df.shape) + print(df.head(5)) + + # upsert the records (for a container, upsert and upsert_batch are equivalent) + await record_collection.upsert_batch(df) + + # retrieve a record + result = await record_collection.get(records[0]["id"]) + print("Retrieved records:") + print(result.shape) + print(result.head(5)) + + # explicit cleanup, usually not needed, but a script like this + # closes so fast that the async close triggered by delete may not finish on time + del record_collection + await asyncio.sleep(1) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/semantic_kernel/connectors/ai/function_call_choice_configuration.py b/python/semantic_kernel/connectors/ai/function_call_choice_configuration.py new file mode 100644 index 000000000000..d44fb946af65 --- /dev/null +++ b/python/semantic_kernel/connectors/ai/function_call_choice_configuration.py @@ -0,0 +1,15 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from pydantic.dataclasses import dataclass + +from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +@dataclass +class FunctionCallChoiceConfiguration: + """Configuration for function call choice.""" + + available_functions: list[KernelFunctionMetadata] | None = None diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 354fd0397a56..70240b45710f 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -1,15 +1,19 @@ # Copyright (c) Microsoft. All rights reserved. -from typing import Any +from collections import OrderedDict +from typing import TYPE_CHECKING, Any -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionCallChoiceConfiguration -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError + +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_choice_behavior import FunctionCallChoiceConfiguration + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata def update_settings_from_function_call_configuration( - function_choice_configuration: FunctionCallChoiceConfiguration, - settings: PromptExecutionSettings, + function_choice_configuration: "FunctionCallChoiceConfiguration", + settings: "PromptExecutionSettings", type: str, ) -> None: """Update the settings from a FunctionChoiceConfiguration.""" @@ -26,7 +30,7 @@ def update_settings_from_function_call_configuration( def kernel_function_metadata_to_function_call_format( - metadata: KernelFunctionMetadata, + metadata: "KernelFunctionMetadata", ) -> dict[str, Any]: """Convert the kernel function metadata to function calling format.""" return { @@ -41,3 +45,26 @@ def kernel_function_metadata_to_function_call_format( }, }, } + + +def _combine_filter_dicts(*dicts: dict[str, list[str]]) -> dict: + """Combine multiple filter dictionaries with list values into one dictionary. + + This method is ensuring unique values while preserving order. + """ + combined_filters = {} + + keys = set().union(*(d.keys() for d in dicts)) + + for key in keys: + combined_functions: OrderedDict[str, None] = OrderedDict() + for d in dicts: + if key in d: + if isinstance(d[key], list): + for item in d[key]: + combined_functions[item] = None + else: + raise ServiceInitializationError(f"Values for filter key '{key}' are not lists.") + combined_filters[key] = list(combined_functions.keys()) + + return combined_filters diff --git a/python/semantic_kernel/connectors/ai/function_choice_behavior.py b/python/semantic_kernel/connectors/ai/function_choice_behavior.py index 13a918ff315b..759274d632f2 100644 --- a/python/semantic_kernel/connectors/ai/function_choice_behavior.py +++ b/python/semantic_kernel/connectors/ai/function_choice_behavior.py @@ -1,29 +1,32 @@ # Copyright (c) Microsoft. All rights reserved. import logging -from collections import OrderedDict from collections.abc import Callable from enum import Enum -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, TypeVar -from pydantic.dataclasses import dataclass from typing_extensions import deprecated +from semantic_kernel.connectors.ai.function_calling_utils import _combine_filter_dicts from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError -from semantic_kernel.functions.kernel_function_metadata import KernelFunctionMetadata from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.utils.experimental_decorator import experimental_class if TYPE_CHECKING: from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.kernel import Kernel + DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS = 5 logger = logging.getLogger(__name__) +_T = TypeVar("_T", bound="FunctionChoiceBehavior") + + @experimental_class class FunctionChoiceType(Enum): """The type of function choice behavior.""" @@ -33,37 +36,6 @@ class FunctionChoiceType(Enum): REQUIRED = "required" -@experimental_class -@dataclass -class FunctionCallChoiceConfiguration: - """Configuration for function call choice.""" - - available_functions: list["KernelFunctionMetadata"] | None = None - - -def _combine_filter_dicts(*dicts: dict[str, list[str]]) -> dict: - """Combine multiple filter dictionaries with list values into one dictionary. - - This method is ensuring unique values while preserving order. - """ - combined_filters = {} - - keys = set().union(*(d.keys() for d in dicts)) - - for key in keys: - combined_functions: OrderedDict[str, None] = OrderedDict() - for d in dicts: - if key in d: - if isinstance(d[key], list): - for item in d[key]: - combined_functions[item] = None - else: - raise ServiceInitializationError(f"Values for filter key '{key}' are not lists.") - combined_filters[key] = list(combined_functions.keys()) - - return combined_filters - - @experimental_class class FunctionChoiceBehavior(KernelBaseModel): """Class that controls function choice behavior. @@ -73,7 +45,7 @@ class FunctionChoiceBehavior(KernelBaseModel): max_auto_invoke_attempts: The maximum number of auto invoke attempts. filters: Filters for the function choice behavior. Available options are: excluded_plugins, included_plugins, excluded_functions, or included_functions. - type: The type of function choice behavior. + type_: The type of function choice behavior. Properties: auto_invoke_kernel_functions: Check if the kernel functions should be auto-invoked. @@ -101,11 +73,11 @@ class FunctionChoiceBehavior(KernelBaseModel): dict[Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str]] | None ) = None - type: FunctionChoiceType | None = None + type_: FunctionChoiceType | None = None @classmethod @deprecated("The `FunctionCallBehavior` class is deprecated; use `FunctionChoiceBehavior` instead.") - def from_function_call_behavior(cls, behavior: "FunctionCallBehavior") -> "FunctionChoiceBehavior": + def from_function_call_behavior(cls: type[_T], behavior: "FunctionCallBehavior") -> _T: """Create a FunctionChoiceBehavior from a FunctionCallBehavior.""" from semantic_kernel.connectors.ai.function_call_behavior import ( EnabledFunctions, @@ -145,8 +117,10 @@ def _check_and_get_config( Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str] ] | None = {}, - ) -> FunctionCallChoiceConfiguration: + ) -> "FunctionCallChoiceConfiguration": """Check for missing functions and get the function call choice configuration.""" + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + if filters: return FunctionCallChoiceConfiguration(available_functions=kernel.get_list_of_function_metadata(filters)) return FunctionCallChoiceConfiguration(available_functions=kernel.get_full_list_of_function_metadata()) @@ -164,15 +138,15 @@ def configure( config = self.get_config(kernel) if config: - update_settings_callback(config, settings, self.type) + update_settings_callback(config, settings, self.type_) - def get_config(self, kernel: "Kernel") -> FunctionCallChoiceConfiguration: + def get_config(self, kernel: "Kernel") -> "FunctionCallChoiceConfiguration": """Get the function call choice configuration based on the type.""" return self._check_and_get_config(kernel, self.filters) @classmethod def Auto( - cls, + cls: type[_T], auto_invoke: bool = True, *, filters: dict[ @@ -180,7 +154,7 @@ def Auto( ] | None = None, **kwargs, - ) -> "FunctionChoiceBehavior": + ) -> _T: """Creates a FunctionChoiceBehavior with type AUTO. Returns FunctionChoiceBehavior class with auto_invoke enabled, and the desired functions @@ -189,21 +163,21 @@ def Auto( """ kwargs.setdefault("maximum_auto_invoke_attempts", DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS if auto_invoke else 0) return cls( - type=FunctionChoiceType.AUTO, + type_=FunctionChoiceType.AUTO, filters=filters, **kwargs, ) @classmethod def NoneInvoke( - cls, + cls: type[_T], *, filters: dict[ Literal["excluded_plugins", "included_plugins", "excluded_functions", "included_functions"], list[str] ] | None = None, **kwargs, - ) -> "FunctionChoiceBehavior": + ) -> _T: """Creates a FunctionChoiceBehavior with type NONE. Returns FunctionChoiceBehavior class with auto_invoke disabled, and the desired functions @@ -212,14 +186,14 @@ def NoneInvoke( """ kwargs.setdefault("maximum_auto_invoke_attempts", 0) return cls( - type=FunctionChoiceType.NONE, + type_=FunctionChoiceType.NONE, filters=filters, **kwargs, ) @classmethod def Required( - cls, + cls: type[_T], auto_invoke: bool = True, *, filters: dict[ @@ -227,7 +201,7 @@ def Required( ] | None = None, **kwargs, - ) -> "FunctionChoiceBehavior": + ) -> _T: """Creates a FunctionChoiceBehavior with type REQUIRED. Returns FunctionChoiceBehavior class with auto_invoke enabled, and the desired functions @@ -236,13 +210,13 @@ def Required( """ kwargs.setdefault("maximum_auto_invoke_attempts", 1 if auto_invoke else 0) return cls( - type=FunctionChoiceType.REQUIRED, + type_=FunctionChoiceType.REQUIRED, filters=filters, **kwargs, ) @classmethod - def from_dict(cls, data: dict) -> "FunctionChoiceBehavior": + def from_dict(cls: type[_T], data: dict) -> _T: """Create a FunctionChoiceBehavior from a dictionary.""" type_map = { "auto": cls.Auto, @@ -268,7 +242,7 @@ def from_dict(cls, data: dict) -> "FunctionChoiceBehavior": ) @classmethod - def from_string(cls, data: str) -> "FunctionChoiceBehavior": + def from_string(cls: type[_T], data: str) -> _T: """Create a FunctionChoiceBehavior from a string. This method converts the provided string to a FunctionChoiceBehavior object @@ -276,11 +250,11 @@ def from_string(cls, data: str) -> "FunctionChoiceBehavior": """ type_value = data.lower() if type_value == "auto": - return FunctionChoiceBehavior.Auto() + return cls.Auto() if type_value == "none": - return FunctionChoiceBehavior.NoneInvoke() + return cls.NoneInvoke() if type_value == "required": - return FunctionChoiceBehavior.Required() + return cls.Required() raise ServiceInitializationError( f"The specified type `{type_value}` is not supported. Allowed types are: `auto`, `none`, `required`." ) diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py index 5199384ba967..abbd5bf1281d 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py @@ -6,7 +6,8 @@ from google.generativeai.protos import Blob, Candidate, FunctionCall, FunctionResponse, Part -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionCallChoiceConfiguration, FunctionChoiceType +from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.google_ai.google_ai_prompt_execution_settings import ( GoogleAIChatPromptExecutionSettings, ) diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py index 9b78f67e74a0..400329688331 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py @@ -8,7 +8,8 @@ from google.cloud.aiplatform_v1beta1.types.tool import FunctionCall, FunctionResponse from vertexai.generative_models import FunctionDeclaration, Tool, ToolConfig -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionCallChoiceConfiguration, FunctionChoiceType +from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceType from semantic_kernel.connectors.ai.google.shared_utils import ( FUNCTION_CHOICE_TYPE_TO_GOOGLE_FUNCTION_CALLING_MODE, format_function_result_content_name_to_gemini_function_name, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py index 61df57d7fa4f..c65e0bc01989 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py @@ -4,7 +4,6 @@ from abc import ABC from typing import Any -from numpy import array from openai import AsyncOpenAI, AsyncStream, BadRequestError from openai.types import Completion, CreateEmbeddingResponse from openai.types.chat import ChatCompletion, ChatCompletionChunk @@ -62,9 +61,7 @@ async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecution try: response = await self.client.embeddings.create(**settings.prepare_settings_dict()) self.store_usage(response) - # make numpy arrays from the response - # TODO (eavanvalkenburg): the openai response is cast to a list[float], could be used instead of ndarray - return [array(x.embedding) for x in response.data] + return [x.embedding for x in response.data] except Exception as ex: raise ServiceResponseException( f"{type(self)} service failed to generate embeddings", diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py index e227879add18..3ba02c445c29 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_embedding_base.py @@ -71,6 +71,6 @@ async def generate_raw_embeddings( raw_embeddings.extend(raw_embedding) return raw_embeddings - @override def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: + """Get the request settings class.""" return OpenAIEmbeddingPromptExecutionSettings diff --git a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py index c530c09342a6..b683a58e17fa 100644 --- a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. import logging -from typing import Any +from typing import Any, TypeVar from pydantic import Field, model_validator @@ -10,6 +10,8 @@ logger = logging.getLogger(__name__) +_T = TypeVar("_T", bound="PromptExecutionSettings") + class PromptExecutionSettings(KernelBaseModel): """Base class for prompt execution settings. @@ -36,7 +38,7 @@ class PromptExecutionSettings(KernelBaseModel): @model_validator(mode="before") @classmethod - def parse_function_choice_behavior(cls, data: dict[str, Any]) -> dict[str, Any]: + def parse_function_choice_behavior(cls: type[_T], data: dict[str, Any]) -> dict[str, Any]: """Parse the function choice behavior data.""" function_choice_behavior_data = data.get("function_choice_behavior") if function_choice_behavior_data: @@ -82,7 +84,7 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: by_alias=True, ) - def update_from_prompt_execution_settings(self, config: "PromptExecutionSettings") -> None: + def update_from_prompt_execution_settings(self, config: _T) -> None: """Update the prompt execution settings from a completion config.""" if config.service_id is not None: self.service_id = config.service_id @@ -91,7 +93,7 @@ def update_from_prompt_execution_settings(self, config: "PromptExecutionSettings self.unpack_extension_data() @classmethod - def from_prompt_execution_settings(cls, config: "PromptExecutionSettings") -> "PromptExecutionSettings": + def from_prompt_execution_settings(cls: type[_T], config: _T) -> _T: """Create a prompt execution settings from a completion config.""" config.pack_extension_data() return cls( diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/__init__.py b/python/semantic_kernel/connectors/memory/azure_ai_search/__init__.py new file mode 100644 index 000000000000..18c97087a2b8 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection import AzureAISearchCollection +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_store import AzureAISearchStore +from semantic_kernel.connectors.memory.azure_cognitive_search.azure_ai_search_settings import AzureAISearchSettings + +__all__ = ["AzureAISearchCollection", "AzureAISearchSettings", "AzureAISearchStore"] diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_collection.py b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_collection.py new file mode 100644 index 000000000000..610bd496794e --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_collection.py @@ -0,0 +1,202 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import logging +import sys +from collections.abc import Sequence +from typing import Any, ClassVar, Generic, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from azure.search.documents.aio import SearchClient +from azure.search.documents.indexes.aio import SearchIndexClient +from azure.search.documents.indexes.models import SearchIndex +from pydantic import ValidationError + +from semantic_kernel.connectors.memory.azure_ai_search.utils import ( + data_model_definition_to_azure_ai_search_index, + get_search_client, + get_search_index_client, +) +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.exceptions import MemoryConnectorException, MemoryConnectorInitializationError +from semantic_kernel.utils.experimental_decorator import experimental_class + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") + + +@experimental_class +class AzureAISearchCollection(VectorStoreRecordCollection[str, TModel], Generic[TModel]): + """Azure AI Search collection implementation.""" + + search_client: SearchClient + search_index_client: SearchIndexClient + supported_key_types: ClassVar[list[str] | None] = ["str"] + supported_vector_types: ClassVar[list[str] | None] = ["float", "int"] + + def __init__( + self, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_name: str | None = None, + search_index_client: SearchIndexClient | None = None, + search_client: SearchClient | None = None, + **kwargs: Any, + ) -> None: + """Initializes a new instance of the AzureAISearchCollection class. + + Args: + data_model_type (type[TModel]): The type of the data model. + data_model_definition (VectorStoreRecordDefinition): The model definition, optional. + collection_name (str): The name of the collection, optional. + search_index_client (SearchIndexClient): The search index client for interacting with Azure AI Search, + used for creating and deleting indexes. + search_client (SearchClient): The search client for interacting with Azure AI Search, + used for record operations. + **kwargs: Additional keyword arguments, including: + The same keyword arguments used for AzureAISearchVectorStore: + search_endpoint: str | None = None, + api_key: str | None = None, + azure_credentials: AzureKeyCredential | None = None, + token_credentials: AsyncTokenCredential | TokenCredential | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None + + """ + if search_client and search_index_client: + if not collection_name: + collection_name = search_client._index_name + elif search_client._index_name != collection_name: + raise MemoryConnectorInitializationError( + "Search client and search index client have different index names." + ) + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + search_client=search_client, + search_index_client=search_index_client, + ) + return + + if search_index_client: + if not collection_name: + raise MemoryConnectorInitializationError("Collection name is required.") + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + search_client=get_search_client( + search_index_client=search_index_client, collection_name=collection_name + ), + search_index_client=search_index_client, + ) + return + + from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_settings import ( + AzureAISearchSettings, + ) + + try: + azure_ai_search_settings = AzureAISearchSettings.create( + env_file_path=kwargs.get("env_file_path", None), + endpoint=kwargs.get("search_endpoint", None), + api_key=kwargs.get("api_key", None), + env_file_encoding=kwargs.get("env_file_encoding", None), + index_name=collection_name, + ) + except ValidationError as exc: + raise MemoryConnectorInitializationError("Failed to create Azure Cognitive Search settings.") from exc + search_index_client = get_search_index_client( + azure_ai_search_settings=azure_ai_search_settings, + azure_credential=kwargs.get("azure_credentials", None), + token_credential=kwargs.get("token_credentials", None), + ) + if not azure_ai_search_settings.index_name: + raise MemoryConnectorInitializationError("Collection name is required.") + + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=azure_ai_search_settings.index_name, + search_client=get_search_client( + search_index_client=search_index_client, collection_name=azure_ai_search_settings.index_name + ), + search_index_client=search_index_client, + ) + + @override + async def _inner_upsert( + self, + records: Sequence[Any], + **kwargs: Any, + ) -> Sequence[str]: + if not isinstance(records, list): + records = list(records) + results = await self.search_client.merge_or_upload_documents(documents=records, **kwargs) + return [result.key for result in results] # type: ignore + + @override + async def _inner_get(self, keys: Sequence[str], **kwargs: Any) -> Sequence[dict[str, Any]]: + client = self.search_client + result = await asyncio.gather( + *[client.get_document(key=key, selected_fields=kwargs.get("selected_fields", ["*"])) for key in keys], + return_exceptions=True, + ) + return [res for res in result if not isinstance(res, BaseException)] + + @override + async def _inner_delete(self, keys: Sequence[str], **kwargs: Any) -> None: + await self.search_client.delete_documents(documents=[{self._key_field_name: key} for key in keys]) + + @override + def _serialize_dicts_to_store_models(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Sequence[Any]: + return records + + @override + def _deserialize_store_models_to_dicts(self, records: Sequence[Any], **kwargs: Any) -> Sequence[dict[str, Any]]: + return records + + @override + async def create_collection(self, **kwargs) -> None: + """Create a new collection in Azure AI Search. + + Args: + **kwargs: Additional keyword arguments. + index (SearchIndex): The search index to create, if this is supplied + this is used instead of a index created based on the definition. + encryption_key (SearchResourceEncryptionKey): The encryption key to use, + not used when index is supplied. + other kwargs are passed to the create_index method. + """ + if index := kwargs.pop("index", None): + if isinstance(index, SearchIndex): + await self.search_index_client.create_index(index=index, **kwargs) + return + raise MemoryConnectorException("Invalid index type supplied.") + await self.search_index_client.create_index( + index=data_model_definition_to_azure_ai_search_index( + collection_name=self.collection_name, + definition=self.data_model_definition, + encryption_key=kwargs.pop("encryption_key", None), + ), + **kwargs, + ) + + @override + async def does_collection_exist(self, **kwargs) -> bool: + if "params" not in kwargs: + kwargs["params"] = {"select": ["name"]} + return self.collection_name in [ + index_name async for index_name in self.search_index_client.list_index_names(**kwargs) + ] + + @override + async def delete_collection(self, **kwargs) -> None: + await self.search_index_client.delete_index(self.collection_name, **kwargs) diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_settings.py b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_settings.py new file mode 100644 index 000000000000..99fc5620d289 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_settings.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import ClassVar + +from pydantic import SecretStr + +from semantic_kernel.kernel_pydantic import HttpsUrl, KernelBaseSettings +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class AzureAISearchSettings(KernelBaseSettings): + """Azure AI Search model settings currently used by the AzureCognitiveSearchMemoryStore connector. + + Args: + - api_key: SecretStr - Azure AI Search API key (Env var AZURE_AI_SEARCH_API_KEY) + - endpoint: HttpsUrl - Azure AI Search endpoint (Env var AZURE_AI_SEARCH_ENDPOINT) + - index_name: str - Azure AI Search index name (Env var AZURE_AI_SEARCH_INDEX_NAME) + """ + + env_prefix: ClassVar[str] = "AZURE_AI_SEARCH_" + + api_key: SecretStr | None = None + endpoint: HttpsUrl + index_name: str | None = None diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_store.py b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_store.py new file mode 100644 index 000000000000..fdfb00e06f4a --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/azure_ai_search_store.py @@ -0,0 +1,123 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from typing import TYPE_CHECKING, Any, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from azure.search.documents.aio import SearchClient +from azure.search.documents.indexes.aio import SearchIndexClient +from pydantic import ValidationError + +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection import ( + AzureAISearchCollection, +) +from semantic_kernel.connectors.memory.azure_ai_search.utils import get_search_client, get_search_index_client +from semantic_kernel.data.vector_store import VectorStore +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.exceptions import MemoryConnectorInitializationError +from semantic_kernel.utils.experimental_decorator import experimental_class + +if TYPE_CHECKING: + from azure.core.credentials import AzureKeyCredential, TokenCredential + from azure.core.credentials_async import AsyncTokenCredential + + from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection + + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") + + +@experimental_class +class AzureAISearchStore(VectorStore): + """Azure AI Search store implementation.""" + + search_index_client: SearchIndexClient + + def __init__( + self, + search_endpoint: str | None = None, + api_key: str | None = None, + azure_credentials: "AzureKeyCredential | None" = None, + token_credentials: "AsyncTokenCredential | TokenCredential | None" = None, + search_index_client: SearchIndexClient | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + ) -> None: + """Initializes a new instance of the AzureAISearchStore client. + + Args: + search_endpoint (str): The endpoint of the Azure AI Search service, optional. + Can be read from environment variables. + api_key (str): Azure AI Search API key, optional. Can be read from environment variables. + azure_credentials (AzureKeyCredential ): Azure AI Search credentials, optional. + token_credentials (AsyncTokenCredential | TokenCredential): Azure AI Search token credentials, optional. + search_index_client (SearchIndexClient): The search index client, optional. + env_file_path (str): Use the environment settings file as a fallback + to environment variables. + env_file_encoding (str): The encoding of the environment settings file. + + """ + from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_settings import ( + AzureAISearchSettings, + ) + + if not search_index_client: + try: + azure_ai_search_settings = AzureAISearchSettings.create( + env_file_path=env_file_path, + endpoint=search_endpoint, + api_key=api_key, + env_file_encoding=env_file_encoding, + ) + except ValidationError as exc: + raise MemoryConnectorInitializationError("Failed to create Azure AI Search settings.") from exc + search_index_client = get_search_index_client( + azure_ai_search_settings=azure_ai_search_settings, + azure_credential=azure_credentials, + token_credential=token_credentials, + ) + + super().__init__(search_index_client=search_index_client) + + @override + def get_collection( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + search_client: SearchClient | None = None, + **kwargs: Any, + ) -> "VectorStoreRecordCollection": + """Get a AzureAISearchCollection tied to a collection. + + Args: + collection_name (str): The name of the collection. + data_model_type (type[TModel]): The type of the data model. + data_model_definition (VectorStoreRecordDefinition | None): The model fields, optional. + search_client (SearchClient | None): The search client for interacting with Azure AI Search, + will be created if not supplied. + **kwargs: Additional keyword arguments, passed to the collection constructor. + """ + if collection_name not in self.vector_record_collections: + self.vector_record_collections[collection_name] = AzureAISearchCollection( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + search_index_client=self.search_index_client, + search_client=search_client or get_search_client(self.search_index_client, collection_name), + collection_name=collection_name, + **kwargs, + ) + return self.vector_record_collections[collection_name] + + @override + async def list_collection_names(self, **kwargs: Any) -> list[str]: + if "params" not in kwargs: + kwargs["params"] = {"select": ["name"]} + return [index async for index in self.search_index_client.list_index_names(**kwargs)] diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/const.py b/python/semantic_kernel/connectors/memory/azure_ai_search/const.py new file mode 100644 index 000000000000..0cb33e3d7497 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/const.py @@ -0,0 +1,51 @@ +# Copyright (c) Microsoft. All rights reserved. + +from azure.search.documents.indexes.models import ( + ExhaustiveKnnAlgorithmConfiguration, + ExhaustiveKnnParameters, + HnswAlgorithmConfiguration, + HnswParameters, + SearchFieldDataType, + VectorSearchAlgorithmMetric, +) + +from semantic_kernel.data.const import DistanceFunction, IndexKind + +INDEX_ALGORITHM_MAP = { + IndexKind.HNSW: (HnswAlgorithmConfiguration, HnswParameters), + IndexKind.FLAT: (ExhaustiveKnnAlgorithmConfiguration, ExhaustiveKnnParameters), + "default": (HnswAlgorithmConfiguration, HnswParameters), +} + +DISTANCE_FUNCTION_MAP = { + DistanceFunction.COSINE: VectorSearchAlgorithmMetric.COSINE, + DistanceFunction.DOT_PROD: VectorSearchAlgorithmMetric.DOT_PRODUCT, + DistanceFunction.EUCLIDEAN: VectorSearchAlgorithmMetric.EUCLIDEAN, + "default": VectorSearchAlgorithmMetric.COSINE, +} + +TYPE_MAPPER_DATA = { + "str": SearchFieldDataType.String, + "int": SearchFieldDataType.Int64, + "float": SearchFieldDataType.Double, + "bool": SearchFieldDataType.Boolean, + "list[str]": SearchFieldDataType.Collection(SearchFieldDataType.String), + "list[int]": SearchFieldDataType.Collection(SearchFieldDataType.Int64), + "list[float]": SearchFieldDataType.Collection(SearchFieldDataType.Double), + "list[bool]": SearchFieldDataType.Collection(SearchFieldDataType.Boolean), + "default": SearchFieldDataType.String, +} + +TYPE_MAPPER_VECTOR = { + "float": SearchFieldDataType.Collection(SearchFieldDataType.Single), + "int": "Collection(Edm.Int16)", + "binary": "Collection(Edm.Byte)", + "default": SearchFieldDataType.Collection(SearchFieldDataType.Single), +} + +__all__ = [ + "DISTANCE_FUNCTION_MAP", + "INDEX_ALGORITHM_MAP", + "TYPE_MAPPER_DATA", + "TYPE_MAPPER_VECTOR", +] diff --git a/python/semantic_kernel/connectors/memory/azure_ai_search/utils.py b/python/semantic_kernel/connectors/memory/azure_ai_search/utils.py new file mode 100644 index 000000000000..a3f3a549d532 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_ai_search/utils.py @@ -0,0 +1,178 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import contextlib +import logging +from typing import TYPE_CHECKING, Any + +from azure.core.credentials import AzureKeyCredential, TokenCredential +from azure.search.documents.aio import SearchClient +from azure.search.documents.indexes.aio import SearchIndexClient +from azure.search.documents.indexes.models import ( + SearchField, + SearchIndex, + SearchResourceEncryptionKey, + SimpleField, + VectorSearch, + VectorSearchProfile, +) + +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_settings import AzureAISearchSettings +from semantic_kernel.connectors.memory.azure_ai_search.const import ( + DISTANCE_FUNCTION_MAP, + INDEX_ALGORITHM_MAP, + TYPE_MAPPER_DATA, + TYPE_MAPPER_VECTOR, +) +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.exceptions import ServiceInitializationError +from semantic_kernel.utils.experimental_decorator import experimental_function +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent + +if TYPE_CHECKING: + from azure.core.credentials_async import AsyncTokenCredential + +logger: logging.Logger = logging.getLogger(__name__) + + +def get_search_client(search_index_client: SearchIndexClient, collection_name: str, **kwargs: Any) -> SearchClient: + """Create a search client for a collection.""" + return SearchClientWrapper( + search_index_client._endpoint, collection_name, search_index_client._credential, **kwargs + ) + + +def get_search_index_client( + azure_ai_search_settings: AzureAISearchSettings, + azure_credential: AzureKeyCredential | None = None, + token_credential: "AsyncTokenCredential | TokenCredential | None" = None, +) -> SearchIndexClient: + """Return a client for Azure Cognitive Search. + + Args: + azure_ai_search_settings (AzureAISearchSettings): Azure Cognitive Search settings. + azure_credential (AzureKeyCredential): Optional Azure credentials (default: {None}). + token_credential (TokenCredential): Optional Token credential (default: {None}). + """ + # Credentials + credential: "AzureKeyCredential | AsyncTokenCredential | TokenCredential | None" = None + if azure_ai_search_settings.api_key: + credential = AzureKeyCredential(azure_ai_search_settings.api_key.get_secret_value()) + elif azure_credential: + credential = azure_credential + elif token_credential: + credential = token_credential + else: + raise ServiceInitializationError("Error: missing Azure AI Search client credentials.") + + return SearchIndexClientWrapper( + endpoint=str(azure_ai_search_settings.endpoint), + credential=credential, # type: ignore + headers=prepend_semantic_kernel_to_user_agent({}) if APP_INFO else None, + ) + + +@experimental_function +def data_model_definition_to_azure_ai_search_index( + collection_name: str, + definition: VectorStoreRecordDefinition, + encryption_key: SearchResourceEncryptionKey | None = None, +) -> SearchIndex: + """Convert a VectorStoreRecordDefinition to an Azure AI Search index.""" + fields = [] + search_profiles = [] + search_algos = [] + + for field in definition.fields.values(): + if isinstance(field, VectorStoreRecordDataField): + if not field.property_type: + logger.debug(f"Field {field.name} has not specified type, defaulting to Edm.String.") + type_ = TYPE_MAPPER_DATA[field.property_type or "default"] + fields.append( + SearchField( + name=field.name, + type=type_, + filterable=field.is_filterable, + # searchable is set first on the value of is_full_text_searchable, + # if it is None it checks the field type, if text then it is searchable + searchable=type_ in ("Edm.String", "Collection(Edm.String)") + if field.is_full_text_searchable is None + else field.is_full_text_searchable, + sortable=True, + hidden=False, + ) + ) + elif isinstance(field, VectorStoreRecordKeyField): + assert field.name # nosec + fields.append( + SimpleField( + name=field.name, + type="Edm.String", # hardcoded, only allowed type for key + key=True, + filterable=True, + searchable=True, + ) + ) + elif isinstance(field, VectorStoreRecordVectorField): + if not field.property_type: + logger.debug(f"Field {field.name} has not specified type, defaulting to Collection(Edm.Single).") + if not field.index_kind: + logger.debug(f"Field {field.name} has not specified index kind, defaulting to hnsw.") + if not field.distance_function: + logger.debug(f"Field {field.name} has not specified distance function, defaulting to cosine.") + profile_name = f"{field.name}_profile" + algo_name = f"{field.name}_algorithm" + fields.append( + SearchField( + name=field.name, + type=TYPE_MAPPER_VECTOR[field.property_type or "default"], + searchable=True, + vector_search_dimensions=field.dimensions, + vector_search_profile_name=profile_name, + hidden=False, + ) + ) + search_profiles.append( + VectorSearchProfile( + name=profile_name, + algorithm_configuration_name=algo_name, + ) + ) + algo_class, algo_params = INDEX_ALGORITHM_MAP[field.index_kind or "default"] + search_algos.append( + algo_class( + name=algo_name, + parameters=algo_params( + metric=DISTANCE_FUNCTION_MAP[field.distance_function or "default"], + ), + ) + ) + return SearchIndex( + name=collection_name, + fields=fields, + vector_search=VectorSearch(profiles=search_profiles, algorithms=search_algos), + encryption_key=encryption_key, + ) + + +class SearchIndexClientWrapper(SearchIndexClient): + """Wrapper to make sure the connection is closed when the object is deleted.""" + + def __del__(self) -> None: + """Async close connection, done when the object is deleted, used when SK creates a client.""" + with contextlib.suppress(Exception): + asyncio.get_running_loop().create_task(self.close()) + + +class SearchClientWrapper(SearchClient): + """Wrapper to make sure the connection is closed when the object is deleted.""" + + def __del__(self) -> None: + """Async close connection, done when the object is deleted, used when SK creates a client.""" + with contextlib.suppress(Exception): + asyncio.get_running_loop().create_task(self.close()) diff --git a/python/semantic_kernel/connectors/memory/qdrant/const.py b/python/semantic_kernel/connectors/memory/qdrant/const.py new file mode 100644 index 000000000000..749635b35f39 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/qdrant/const.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft. All rights reserved. + +from qdrant_client.models import Datatype, Distance + +from semantic_kernel.data.const import DistanceFunction + +DISTANCE_FUNCTION_MAP = { + DistanceFunction.COSINE: Distance.COSINE, + DistanceFunction.DOT_PROD: Distance.DOT, + DistanceFunction.EUCLIDEAN: Distance.EUCLID, + DistanceFunction.MANHATTAN: Distance.MANHATTAN, + "default": Distance.COSINE, +} + +TYPE_MAPPER_VECTOR = { + "float": Datatype.FLOAT32, + "int": Datatype.UINT8, + "binary": Datatype.UINT8, + "default": Datatype.FLOAT32, +} + +__all__ = [ + "DISTANCE_FUNCTION_MAP", + "TYPE_MAPPER_VECTOR", +] diff --git a/python/semantic_kernel/connectors/memory/qdrant/qdrant_collection.py b/python/semantic_kernel/connectors/memory/qdrant/qdrant_collection.py new file mode 100644 index 000000000000..2fcad3d6fc63 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/qdrant/qdrant_collection.py @@ -0,0 +1,257 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import Mapping, Sequence +from typing import Any, ClassVar, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from pydantic import ValidationError +from qdrant_client.async_qdrant_client import AsyncQdrantClient +from qdrant_client.models import PointStruct, VectorParams + +from semantic_kernel.connectors.memory.qdrant.const import DISTANCE_FUNCTION_MAP, TYPE_MAPPER_VECTOR +from semantic_kernel.connectors.memory.qdrant.utils import AsyncQdrantClientWrapper +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.data.vector_store_record_fields import VectorStoreRecordVectorField +from semantic_kernel.exceptions import ( + MemoryConnectorInitializationError, + VectorStoreModelValidationError, +) +from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorException +from semantic_kernel.kernel_types import OneOrMany +from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") +TKey = TypeVar("TKey", str, int) + + +@experimental_class +class QdrantCollection(VectorStoreRecordCollection[str | int, TModel]): + """A QdrantCollection is a memory collection that uses Qdrant as the backend.""" + + qdrant_client: AsyncQdrantClient + named_vectors: bool + supported_key_types: ClassVar[list[str] | None] = ["str", "int"] + supported_vector_types: ClassVar[list[str] | None] = ["float", "int"] + + def __init__( + self, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_name: str | None = None, + named_vectors: bool = True, + url: str | None = None, + api_key: str | None = None, + host: str | None = None, + port: int | None = None, + grpc_port: int | None = None, + path: str | None = None, + location: str | None = None, + prefer_grpc: bool | None = None, + client: AsyncQdrantClient | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + **kwargs: Any, + ) -> None: + """Initializes a new instance of the QdrantVectorRecordStore. + + When using qdrant client, make sure to supply url and api_key. + When using qdrant server, make sure to supply url or host and optionally port. + When using qdrant local, either supply path to use a persisted qdrant instance + or set location to ":memory:" to use an in-memory qdrant instance. + When nothing is supplied, it defaults to an in-memory qdrant instance. + You can also supply a async qdrant client directly. + + Args: + data_model_type (type[TModel]): The type of the data model. + data_model_definition (VectorStoreRecordDefinition): The model fields, optional. + collection_name (str): The name of the collection, optional. + named_vectors (bool): If true, vectors are stored with name (default: True). + url (str): The URL of the Qdrant server (default: {None}). + api_key (str): The API key for the Qdrant server (default: {None}). + host (str): The host of the Qdrant server (default: {None}). + port (int): The port of the Qdrant server (default: {None}). + grpc_port (int): The gRPC port of the Qdrant server (default: {None}). + path (str): The path of the Qdrant server (default: {None}). + location (str): The location of the Qdrant server (default: {None}). + prefer_grpc (bool): If true, gRPC will be preferred (default: {None}). + client (AsyncQdrantClient): The Qdrant client to use (default: {None}). + env_file_path (str): Use the environment settings file as a fallback to environment variables. + env_file_encoding (str): The encoding of the environment settings file. + **kwargs: Additional keyword arguments passed to the client constructor. + + """ + if client: + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + qdrant_client=client, # type: ignore + named_vectors=named_vectors, # type: ignore + ) + return + + from semantic_kernel.connectors.memory.qdrant.qdrant_settings import QdrantSettings + + try: + settings = QdrantSettings.create( + url=url, + api_key=api_key, + host=host, + port=port, + grpc_port=grpc_port, + path=path, + location=location, + prefer_grpc=prefer_grpc, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise MemoryConnectorInitializationError("Failed to create Qdrant settings.", ex) from ex + if APP_INFO: + kwargs.setdefault("metadata", {}) + kwargs["metadata"] = prepend_semantic_kernel_to_user_agent(kwargs["metadata"]) + try: + client = AsyncQdrantClientWrapper(**settings.model_dump(exclude_none=True), **kwargs) + except ValueError as ex: + raise MemoryConnectorInitializationError("Failed to create Qdrant client.", ex) from ex + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + qdrant_client=client, + named_vectors=named_vectors, + ) + + @override + async def _inner_upsert( + self, + records: Sequence[PointStruct], + **kwargs: Any, + ) -> Sequence[TKey]: + await self.qdrant_client.upsert( + collection_name=self.collection_name, + points=records, + **kwargs, + ) + return [record.id for record in records] + + @override + async def _inner_get(self, keys: Sequence[TKey], **kwargs: Any) -> OneOrMany[Any] | None: + if "with_vectors" not in kwargs: + kwargs["with_vectors"] = True + return await self.qdrant_client.retrieve( + collection_name=self.collection_name, + ids=keys, + **kwargs, + ) + + @override + async def _inner_delete(self, keys: Sequence[TKey], **kwargs: Any) -> None: + await self.qdrant_client.delete( + collection_name=self.collection_name, + points_selector=keys, + **kwargs, + ) + + @override + def _serialize_dicts_to_store_models( + self, + records: Sequence[dict[str, Any]], + **kwargs: Any, + ) -> Sequence[PointStruct]: + return [ + PointStruct( + id=record.pop(self._key_field_name), + vector=record.pop(self.data_model_definition.vector_field_names[0]) + if not self.named_vectors + else {field: record.pop(field) for field in self.data_model_definition.vector_field_names}, + payload=record, + ) + for record in records + ] + + @override + def _deserialize_store_models_to_dicts( + self, + records: Sequence[PointStruct], + **kwargs: Any, + ) -> Sequence[dict[str, Any]]: + return [ + { + self._key_field_name: record.id, + **(record.payload if record.payload else {}), + **( + record.vector + if isinstance(record.vector, dict) + else {self.data_model_definition.vector_field_names[0]: record.vector} + ), + } + for record in records + ] + + @override + async def create_collection(self, **kwargs) -> None: + """Create a new collection in Azure AI Search. + + Args: + **kwargs: Additional keyword arguments. + You can supply all keyword arguments supported by the QdrantClient.create_collection method. + This method creates the vectors_config automatically when not supplied, other params are not set. + Collection name will be set to the collection_name property, cannot be overridden. + """ + if "vectors_config" not in kwargs: + vectors_config: VectorParams | Mapping[str, VectorParams] = {} + if self.named_vectors: + for field in self.data_model_definition.vector_field_names: + vector = self.data_model_definition.fields[field] + assert isinstance(vector, VectorStoreRecordVectorField) # nosec + if not vector.dimensions: + raise MemoryConnectorException("Vector field must have dimensions.") + vectors_config[field] = VectorParams( + size=vector.dimensions, + distance=DISTANCE_FUNCTION_MAP[vector.distance_function or "default"], + datatype=TYPE_MAPPER_VECTOR[vector.property_type or "default"], + ) + else: + vector = self.data_model_definition.fields[self.data_model_definition.vector_field_names[0]] + assert isinstance(vector, VectorStoreRecordVectorField) # nosec + if not vector.dimensions: + raise MemoryConnectorException("Vector field must have dimensions.") + vectors_config = VectorParams( + size=vector.dimensions, + distance=DISTANCE_FUNCTION_MAP[vector.distance_function or "default"], + datatype=TYPE_MAPPER_VECTOR[vector.property_type or "default"], + ) + kwargs["vectors_config"] = vectors_config + if "collection_name" not in kwargs: + kwargs["collection_name"] = self.collection_name + await self.qdrant_client.create_collection(**kwargs) + + @override + async def does_collection_exist(self, **kwargs) -> bool: + return await self.qdrant_client.collection_exists(self.collection_name, **kwargs) + + @override + async def delete_collection(self, **kwargs) -> None: + await self.qdrant_client.delete_collection(self.collection_name, **kwargs) + + def _validate_data_model(self): + """Internal function that should be overloaded by child classes to validate datatypes, etc. + + This should take the VectorStoreRecordDefinition from the item_type and validate it against the store. + + Checks should include, allowed naming of parameters, allowed data types, allowed vector dimensions. + """ + super()._validate_data_model() + if len(self.data_model_definition.vector_field_names) > 1 and not self.named_vectors: + raise VectorStoreModelValidationError("Only one vector field is allowed when not using named vectors.") diff --git a/python/semantic_kernel/connectors/memory/qdrant/qdrant_settings.py b/python/semantic_kernel/connectors/memory/qdrant/qdrant_settings.py new file mode 100644 index 000000000000..e80fb4fe3fc0 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/qdrant/qdrant_settings.py @@ -0,0 +1,42 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import ClassVar + +from pydantic import HttpUrl, SecretStr, model_validator + +from semantic_kernel.kernel_pydantic import KernelBaseSettings +from semantic_kernel.utils.experimental_decorator import experimental_class + +IN_MEMORY_STRING = ":memory:" + + +@experimental_class +class QdrantSettings(KernelBaseSettings): + """Qdrant settings currently used by the Qdrant Vector Record Store.""" + + env_prefix: ClassVar[str] = "QDRANT_" + + url: HttpUrl | None = None + api_key: SecretStr | None = None + host: str | None = None + port: int | None = None + grpc_port: int | None = None + path: str | None = None + location: str | None = None + prefer_grpc: bool = False + + @model_validator(mode="before") + def validate_settings(cls, values): + """Validate the settings.""" + if "url" not in values and "host" not in values and "path" not in values and "location" not in values: + values["location"] = IN_MEMORY_STRING + return values + + def model_dump(self, **kwargs): + """Dump the model.""" + dump = super().model_dump(**kwargs) + if "api_key" in dump: + dump["api_key"] = dump["api_key"].get_secret_value() + if "url" in dump: + dump["url"] = str(dump["url"]) + return dump diff --git a/python/semantic_kernel/connectors/memory/qdrant/qdrant_store.py b/python/semantic_kernel/connectors/memory/qdrant/qdrant_store.py new file mode 100644 index 000000000000..85f0a1d7eaf5 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/qdrant/qdrant_store.py @@ -0,0 +1,135 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import Sequence +from typing import TYPE_CHECKING, Any, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from pydantic import ValidationError +from qdrant_client.async_qdrant_client import AsyncQdrantClient + +from semantic_kernel.connectors.memory.qdrant.qdrant_collection import QdrantCollection +from semantic_kernel.data.vector_store import VectorStore +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.exceptions import MemoryConnectorInitializationError +from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent + +if TYPE_CHECKING: + from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") +TKey = TypeVar("TKey", str, int) + + +@experimental_class +class QdrantStore(VectorStore): + """A QdrantStore is a memory store that uses Qdrant as the backend.""" + + qdrant_client: AsyncQdrantClient + + def __init__( + self, + url: str | None = None, + api_key: str | None = None, + host: str | None = None, + port: int | None = None, + grpc_port: int | None = None, + path: str | None = None, + location: str | None = None, + prefer_grpc: bool | None = None, + client: AsyncQdrantClient | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + **kwargs: Any, + ) -> None: + """Initializes a new instance of the QdrantVectorRecordStore. + + When using qdrant client, make sure to supply url and api_key. + When using qdrant server, make sure to supply url or host and optionally port. + When using qdrant local, either supply path to use a persisted qdrant instance + or set location to ":memory:" to use an in-memory qdrant instance. + When nothing is supplied, it defaults to an in-memory qdrant instance. + You can also supply a async qdrant client directly. + + Args: + url (str): The URL of the Qdrant server (default: {None}). + api_key (str): The API key for the Qdrant server (default: {None}). + host (str): The host of the Qdrant server (default: {None}). + port (int): The port of the Qdrant server (default: {None}). + grpc_port (int): The gRPC port of the Qdrant server (default: {None}). + path (str): The path of the Qdrant server (default: {None}). + location (str): The location of the Qdrant server (default: {None}). + prefer_grpc (bool): If true, gRPC will be preferred (default: {None}). + client (AsyncQdrantClient): The Qdrant client to use (default: {None}). + env_file_path (str): Use the environment settings file as a fallback to environment variables. + env_file_encoding (str): The encoding of the environment settings file. + **kwargs: Additional keyword arguments passed to the client constructor. + + """ + if client: + super().__init__(qdrant_client=client, **kwargs) + return + + from semantic_kernel.connectors.memory.qdrant.qdrant_settings import QdrantSettings + + try: + settings = QdrantSettings.create( + url=url, + api_key=api_key, + host=host, + port=port, + grpc_port=grpc_port, + path=path, + location=location, + prefer_grpc=prefer_grpc, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise MemoryConnectorInitializationError("Failed to create Qdrant settings.", ex) from ex + if APP_INFO: + kwargs.setdefault("metadata", {}) + kwargs["metadata"] = prepend_semantic_kernel_to_user_agent(kwargs["metadata"]) + try: + client = AsyncQdrantClient(**settings.model_dump(exclude_none=True), **kwargs) + except ValueError as ex: + raise MemoryConnectorInitializationError("Failed to create Qdrant client.", ex) from ex + super().__init__(qdrant_client=client) + + def get_collection( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + **kwargs: Any, + ) -> "VectorStoreRecordCollection": + """Get a QdrantCollection tied to a collection. + + Args: + collection_name (str): The name of the collection. + data_model_type (type[TModel]): The type of the data model. + data_model_definition (VectorStoreRecordDefinition | None): The model fields, optional. + **kwargs: Additional keyword arguments, passed to the collection constructor. + """ + if collection_name not in self.vector_record_collections: + self.vector_record_collections[collection_name] = QdrantCollection[data_model_type]( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + client=self.qdrant_client, + **kwargs, + ) + return self.vector_record_collections[collection_name] + + @override + async def list_collection_names(self, **kwargs: Any) -> Sequence[str]: + collections = await self.qdrant_client.get_collections() + return [collection.name for collection in collections.collections] diff --git a/python/semantic_kernel/connectors/memory/qdrant/utils.py b/python/semantic_kernel/connectors/memory/qdrant/utils.py new file mode 100644 index 000000000000..34fbe4b3f6a2 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/qdrant/utils.py @@ -0,0 +1,15 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import contextlib + +from qdrant_client.async_qdrant_client import AsyncQdrantClient + + +class AsyncQdrantClientWrapper(AsyncQdrantClient): + """Wrapper to make sure the connection is closed when the object is deleted.""" + + def __del__(self) -> None: + """Async close connection, done when the object is deleted, used when SK creates a client.""" + with contextlib.suppress(Exception): + asyncio.get_running_loop().create_task(self.close()) diff --git a/python/semantic_kernel/connectors/memory/redis/const.py b/python/semantic_kernel/connectors/memory/redis/const.py new file mode 100644 index 000000000000..490b0915a190 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/redis/const.py @@ -0,0 +1,39 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from enum import Enum + +from redis.commands.search.indexDefinition import IndexType + +from semantic_kernel.data.const import DistanceFunction + + +class RedisCollectionTypes(str, Enum): + JSON = "json" + HASHSET = "hashset" + + +INDEX_TYPE_MAP = { + RedisCollectionTypes.JSON: IndexType.JSON, + RedisCollectionTypes.HASHSET: IndexType.HASH, +} + +DISTANCE_FUNCTION_MAP = { + DistanceFunction.COSINE: "COSINE", + DistanceFunction.DOT_PROD: "IP", + DistanceFunction.EUCLIDEAN: "L2", + "default": "COSINE", +} + +TYPE_MAPPER_VECTOR = { + "float": "FLOAT32", + "int": "FLOAT16", + "binary": "FLOAT16", + "ndarray": "FLOAT32", + "default": "FLOAT32", +} + +__all__ = [ + "DISTANCE_FUNCTION_MAP", + "TYPE_MAPPER_VECTOR", +] diff --git a/python/semantic_kernel/connectors/memory/redis/redis_collection.py b/python/semantic_kernel/connectors/memory/redis/redis_collection.py new file mode 100644 index 000000000000..e86d32b21bda --- /dev/null +++ b/python/semantic_kernel/connectors/memory/redis/redis_collection.py @@ -0,0 +1,330 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import json +import logging +import sys +from collections.abc import Sequence +from typing import Any, ClassVar, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +import numpy as np +from pydantic import ValidationError +from redis.asyncio.client import Redis +from redis.commands.search.indexDefinition import IndexDefinition + +from semantic_kernel.connectors.memory.redis.const import INDEX_TYPE_MAP, RedisCollectionTypes +from semantic_kernel.connectors.memory.redis.utils import RedisWrapper, data_model_definition_to_redis_fields +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + MemoryConnectorInitializationError, +) +from semantic_kernel.utils.experimental_decorator import experimental_class + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") + + +@experimental_class +class RedisCollection(VectorStoreRecordCollection[str, TModel]): + """A vector store record collection implementation using Redis.""" + + redis_database: Redis + prefix_collection_name_to_key_names: bool + collection_type: RedisCollectionTypes + supported_key_types: ClassVar[list[str] | None] = ["str"] + supported_vector_types: ClassVar[list[str] | None] = ["float"] + + def __init__( + self, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_name: str | None = None, + redis_database: Redis | None = None, + prefix_collection_name_to_key_names: bool = False, + collection_type: RedisCollectionTypes = RedisCollectionTypes.HASHSET, + connection_string: str | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + ) -> None: + """RedisMemoryStore is an abstracted interface to interact with a Redis node connection. + + See documentation about connections: https://redis-py.readthedocs.io/en/stable/connections.html + See documentation about vector attributes: https://redis.io/docs/stack/search/reference/vectors. + + """ + if redis_database: + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=redis_database, + prefix_collection_name_to_key_names=prefix_collection_name_to_key_names, + collection_type=collection_type, + ) + return + try: + from semantic_kernel.connectors.memory.redis.redis_settings import RedisSettings + + redis_settings = RedisSettings.create( + connection_string=connection_string, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise MemoryConnectorInitializationError("Failed to create Redis settings.", ex) from ex + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=RedisWrapper.from_url(redis_settings.connection_string.get_secret_value()), + prefix_collection_name_to_key_names=prefix_collection_name_to_key_names, + collection_type=collection_type, + ) + + def _get_redis_key(self, key: str) -> str: + if self.prefix_collection_name_to_key_names: + return f"{self.collection_name}:{key}" + return key + + def _unget_redis_key(self, key: str) -> str: + if self.prefix_collection_name_to_key_names and ":" in key: + return key[len(self.collection_name) + 1 :] + return key + + @override + async def create_collection(self, **kwargs) -> None: + """Create a new index in Redis. + + Args: + **kwargs: Additional keyword arguments. + fields (list[Fields]): The fields to create the index with, when not supplied, + these are created from the data_model_definition. + index_definition (IndexDefinition): The search index to create, if this is supplied + this is used instead of a index created based on the definition. + other kwargs are passed to the create_index method. + """ + if (index_definition := kwargs.pop("index_definition", None)) and (fields := kwargs.pop("fields", None)): + if isinstance(index_definition, IndexDefinition): + await self.redis_database.ft(self.collection_name).create_index( + fields, definition=index_definition, **kwargs + ) + return + raise MemoryConnectorException("Invalid index type supplied.") + fields = data_model_definition_to_redis_fields(self.data_model_definition, self.collection_type) + index_definition = IndexDefinition( + prefix=f"{self.collection_name}:", index_type=INDEX_TYPE_MAP[self.collection_type] + ) + await self.redis_database.ft(self.collection_name).create_index(fields, definition=index_definition, **kwargs) + + @override + async def does_collection_exist(self, **kwargs) -> bool: + try: + await self.redis_database.ft(self.collection_name).info() + return True + except Exception: + return False + + @override + async def delete_collection(self, **kwargs) -> None: + exists = await self.does_collection_exist() + if exists: + await self.redis_database.ft(self.collection_name).dropindex(**kwargs) + else: + logger.debug("Collection does not exist, skipping deletion.") + + +@experimental_class +class RedisHashsetCollection(RedisCollection): + """A vector store record collection implementation using Redis Hashsets.""" + + def __init__( + self, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_name: str | None = None, + redis_database: Redis | None = None, + prefix_collection_name_to_key_names: bool = False, + connection_string: str | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + **kwargs: Any, + ) -> None: + """RedisMemoryStore is an abstracted interface to interact with a Redis node connection. + + See documentation about connections: https://redis-py.readthedocs.io/en/stable/connections.html + See documentation about vector attributes: https://redis.io/docs/stack/search/reference/vectors. + + """ + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=redis_database, + prefix_collection_name_to_key_names=prefix_collection_name_to_key_names, + collection_type=RedisCollectionTypes.HASHSET, + connection_string=connection_string, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + **kwargs, + ) + + @override + async def _inner_upsert(self, records: Sequence[Any], **kwargs: Any) -> Sequence[str]: + return await asyncio.gather(*[self._single_upsert(record) for record in records]) + + async def _single_upsert(self, upsert_record: Any) -> str: + await self.redis_database.hset(**upsert_record) + return self._unget_redis_key(upsert_record["name"]) + + @override + async def _inner_get(self, keys: Sequence[str], **kwargs) -> Sequence[dict[bytes, bytes]] | None: + results = await asyncio.gather(*[self.redis_database.hgetall(self._get_redis_key(key)) for key in keys]) + return [result for result in results if result] + + @override + async def _inner_delete(self, keys: Sequence[str], **kwargs: Any) -> None: + await self.redis_database.delete(*[self._get_redis_key(key) for key in keys]) + + @override + def _serialize_dicts_to_store_models( + self, + records: Sequence[dict[str, Any]], + **kwargs: Any, + ) -> Sequence[dict[str, Any]]: + """Serialize the dict to a Redis store model.""" + results = [] + for record in records: + result = {"mapping": {}} + metadata = {} + for name, field in self.data_model_definition.fields.items(): + if isinstance(field, VectorStoreRecordVectorField): + if not isinstance(record[name], np.ndarray): + record[name] = np.array(record[name]) + result["mapping"][name] = record[name].tobytes() + continue + if isinstance(field, VectorStoreRecordKeyField): + result["name"] = self._get_redis_key(record[name]) + continue + metadata[name] = record[field.name] + result["mapping"]["metadata"] = json.dumps(metadata) + results.append(result) + return results + + @override + def _deserialize_store_models_to_dicts( + self, + records: Sequence[dict[bytes, bytes]], + keys: Sequence[str], + **kwargs: Any, + ) -> Sequence[dict[str, Any]]: + results = [] + for key, record in zip(keys, records): + if record: + flattened = json.loads(record[b"metadata"]) + for name, field in self.data_model_definition.fields.items(): + if isinstance(field, VectorStoreRecordKeyField): + flattened[name] = self._unget_redis_key(key) + if isinstance(field, VectorStoreRecordVectorField): + flattened[name] = np.frombuffer(record[name.encode()]).tolist() + results.append(flattened) + return results + + +@experimental_class +class RedisJsonCollection(RedisCollection): + """A vector store record collection implementation using Redis Json.""" + + def __init__( + self, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_name: str | None = None, + redis_database: Redis | None = None, + prefix_collection_name_to_key_names: bool = False, + connection_string: str | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + **kwargs: Any, + ) -> None: + """RedisMemoryStore is an abstracted interface to interact with a Redis node connection. + + See documentation about connections: https://redis-py.readthedocs.io/en/stable/connections.html + See documentation about vector attributes: https://redis.io/docs/stack/search/reference/vectors. + + """ + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=redis_database, + prefix_collection_name_to_key_names=prefix_collection_name_to_key_names, + collection_type=RedisCollectionTypes.JSON, + connection_string=connection_string, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + **kwargs, + ) + + @override + async def _inner_upsert(self, records: Sequence[Any], **kwargs: Any) -> Sequence[str]: + return await asyncio.gather(*[self._single_upsert(record) for record in records]) + + async def _single_upsert(self, upsert_record: Any) -> str: + await self.redis_database.json().set(upsert_record["name"], "$", upsert_record["value"]) + return self._unget_redis_key(upsert_record["name"]) + + @override + async def _inner_get(self, keys: Sequence[str], **kwargs) -> Sequence[dict[bytes, bytes]] | None: + results = await self.redis_database.json().mget([self._get_redis_key(key) for key in keys], "$", **kwargs) + return [result[0] for result in results if result] + + @override + async def _inner_delete(self, keys: Sequence[str], **kwargs: Any) -> None: + await asyncio.gather(*[self.redis_database.json().delete(key, **kwargs) for key in keys]) + + @override + def _serialize_dicts_to_store_models( + self, + records: Sequence[dict[str, Any]], + **kwargs: Any, + ) -> Sequence[dict[str, Any]]: + """Serialize the dict to a Redis store model.""" + results = [] + for record in records: + result = {"value": {}} + for name, field in self.data_model_definition.fields.items(): + if isinstance(field, VectorStoreRecordKeyField): + result["name"] = self._get_redis_key(record[name]) + continue + if isinstance(field, VectorStoreRecordVectorField): + if isinstance(record[name], np.ndarray): + record[name] = record[name].tolist() + result["value"][name] = record[name] + result["value"][name] = record[name] + results.append(result) + return results + + @override + def _deserialize_store_models_to_dicts( + self, + records: Sequence[dict[str, Any]], + keys: Sequence[str], + **kwargs: Any, + ) -> Sequence[dict[str, Any]]: + results = [] + for key, record in zip(keys, records): + record[self.data_model_definition.key_field_name] = self._unget_redis_key(key) + results.append(record) + return results diff --git a/python/semantic_kernel/connectors/memory/redis/redis_store.py b/python/semantic_kernel/connectors/memory/redis/redis_store.py new file mode 100644 index 000000000000..e0b5d8991ee3 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/redis/redis_store.py @@ -0,0 +1,105 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import Sequence +from typing import Any, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from pydantic import ValidationError +from redis.asyncio.client import Redis + +from semantic_kernel.connectors.memory.redis.const import RedisCollectionTypes +from semantic_kernel.connectors.memory.redis.redis_collection import RedisHashsetCollection, RedisJsonCollection +from semantic_kernel.connectors.memory.redis.utils import RedisWrapper +from semantic_kernel.data.vector_store import VectorStore +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorInitializationError +from semantic_kernel.utils.experimental_decorator import experimental_class + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") + + +@experimental_class +class RedisStore(VectorStore): + """Create a Redis Vector Store.""" + + redis_database: Redis + + def __init__( + self, + connection_string: str | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + redis_database: Redis | None = None, + **kwargs: Any, + ) -> None: + """RedisMemoryStore is an abstracted interface to interact with a Redis node connection. + + See documentation about connections: https://redis-py.readthedocs.io/en/stable/connections.html + See documentation about vector attributes: https://redis.io/docs/stack/search/reference/vectors. + + """ + if redis_database: + super().__init__(redis_database=redis_database) + return + try: + from semantic_kernel.connectors.memory.redis.redis_settings import RedisSettings + + redis_settings = RedisSettings.create( + connection_string=connection_string, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise MemoryConnectorInitializationError("Failed to create Redis settings.", ex) from ex + super().__init__(redis_database=RedisWrapper.from_url(redis_settings.connection_string.get_secret_value())) + + @override + async def list_collection_names(self, **kwargs) -> Sequence[str]: + return [name.decode() for name in await self.redis_database.execute_command("FT._LIST")] + + @override + def get_collection( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + collection_type: RedisCollectionTypes = RedisCollectionTypes.HASHSET, + **kwargs: Any, + ) -> "VectorStoreRecordCollection": + """Get a RedisCollection.. + + Args: + collection_name (str): The name of the collection. + data_model_type (type[TModel]): The type of the data model. + data_model_definition (VectorStoreRecordDefinition | None): The model fields, optional. + collection_type (RedisCollectionTypes): The type of the collection, can be JSON or HASHSET. + + **kwargs: Additional keyword arguments, passed to the collection constructor. + """ + if collection_name not in self.vector_record_collections: + if collection_type == RedisCollectionTypes.HASHSET: + self.vector_record_collections[collection_name] = RedisHashsetCollection( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=self.redis_database, + **kwargs, + ) + else: + self.vector_record_collections[collection_name] = RedisJsonCollection( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + redis_database=self.redis_database, + **kwargs, + ) + return self.vector_record_collections[collection_name] diff --git a/python/semantic_kernel/connectors/memory/redis/utils.py b/python/semantic_kernel/connectors/memory/redis/utils.py index babadccad08b..0c0ccbee56ea 100644 --- a/python/semantic_kernel/connectors/memory/redis/utils.py +++ b/python/semantic_kernel/connectors/memory/redis/utils.py @@ -1,17 +1,29 @@ # Copyright (c) Microsoft. All rights reserved. +import asyncio +import contextlib import json from datetime import datetime from typing import Any import numpy as np -from redis import Redis +from redis.asyncio.client import Redis from redis.commands.search.document import Document - +from redis.commands.search.field import Field as RedisField +from redis.commands.search.field import NumericField, TagField, TextField, VectorField + +from semantic_kernel.connectors.memory.azure_ai_search.const import DISTANCE_FUNCTION_MAP +from semantic_kernel.connectors.memory.redis.const import TYPE_MAPPER_VECTOR, RedisCollectionTypes +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) from semantic_kernel.memory.memory_record import MemoryRecord -def get_redis_key(collection_name: str, record_id: str) -> str: +def get_redis_key(collection_name: str, record_id: str) -> str: # pragma: no cover """Returns the Redis key for an element called record_id within collection_name. Args: @@ -24,7 +36,7 @@ def get_redis_key(collection_name: str, record_id: str) -> str: return f"{collection_name}:{record_id}" -def split_redis_key(redis_key: str) -> tuple[str, str]: +def split_redis_key(redis_key: str) -> tuple[str, str]: # pragma: no cover """Split a Redis key into its collection name and record ID. Args: @@ -37,7 +49,7 @@ def split_redis_key(redis_key: str) -> tuple[str, str]: return collection, record_id -def serialize_record_to_redis(record: MemoryRecord, vector_type: np.dtype) -> dict[str, Any]: +def serialize_record_to_redis(record: MemoryRecord, vector_type: np.dtype) -> dict[str, Any]: # pragma: no cover """Serialize a MemoryRecord to Redis fields.""" all_metadata = { "is_reference": record._is_reference, @@ -56,7 +68,9 @@ def serialize_record_to_redis(record: MemoryRecord, vector_type: np.dtype) -> di } -def deserialize_redis_to_record(fields: dict[str, Any], vector_type: np.dtype, with_embedding: bool) -> MemoryRecord: +def deserialize_redis_to_record( + fields: dict[str, Any], vector_type: np.dtype, with_embedding: bool +) -> MemoryRecord: # pragma: no cover """Deserialize Redis fields to a MemoryRecord.""" metadata = json.loads(fields[b"metadata"]) record = MemoryRecord( @@ -81,7 +95,7 @@ def deserialize_redis_to_record(fields: dict[str, Any], vector_type: np.dtype, w def deserialize_document_to_record( database: Redis, doc: Document, vector_type: np.dtype, with_embedding: bool -) -> MemoryRecord: +) -> MemoryRecord: # pragma: no cover """Deserialize document to a MemoryRecord.""" # Document's ID refers to the Redis key redis_key = doc["id"] @@ -107,3 +121,68 @@ def deserialize_document_to_record( record._embedding = np.frombuffer(eb, dtype=vector_type).astype(float) return record + + +class RedisWrapper(Redis): + """Wrapper to make sure the connection is closed when the object is deleted.""" + + def __del__(self) -> None: + """Close connection, done when the object is deleted, used when SK creates a client.""" + with contextlib.suppress(Exception): + asyncio.get_running_loop().create_task(self.close()) + + +def data_model_definition_to_redis_fields( + data_model_definition: VectorStoreRecordDefinition, collection_type: RedisCollectionTypes +) -> list[RedisField]: + """Create a list of fields for Redis from a data_model_definition.""" + fields: list[RedisField] = [] + for name, field in data_model_definition.fields.items(): + if isinstance(field, VectorStoreRecordKeyField): + continue + if collection_type == RedisCollectionTypes.HASHSET: + fields.append(_field_to_redis_field_hashset(name, field)) + elif collection_type == RedisCollectionTypes.JSON: + fields.append(_field_to_redis_field_json(name, field)) + return fields + + +def _field_to_redis_field_hashset( + name: str, field: VectorStoreRecordVectorField | VectorStoreRecordDataField +) -> RedisField: + if isinstance(field, VectorStoreRecordVectorField): + return VectorField( + name=name, + algorithm=field.index_kind.value.upper() if field.index_kind else "HNSW", + attributes={ + "type": TYPE_MAPPER_VECTOR[field.property_type or "default"], + "dim": field.dimensions, + "distance_metric": DISTANCE_FUNCTION_MAP[field.distance_function or "default"], + }, + ) + if field.property_type in ["int", "float"]: + return NumericField(name=name) + if field.is_full_text_searchable: + return TextField(name=name) + return TagField(name=name) + + +def _field_to_redis_field_json( + name: str, field: VectorStoreRecordVectorField | VectorStoreRecordDataField +) -> RedisField: + if isinstance(field, VectorStoreRecordVectorField): + return VectorField( + name=f"$.{name}", + algorithm=field.index_kind.value.upper() if field.index_kind else "HNSW", + attributes={ + "type": TYPE_MAPPER_VECTOR[field.property_type or "default"], + "dim": field.dimensions, + "distance_metric": DISTANCE_FUNCTION_MAP[field.distance_function or "default"], + }, + as_name=name, + ) + if field.property_type in ["int", "float"]: + return NumericField(name=f"$.{name}", as_name=name) + if field.is_full_text_searchable: + return TextField(name=f"$.{name}", as_name=name) + return TagField(name=f"$.{name}", as_name=name) diff --git a/python/semantic_kernel/connectors/memory/volatile/__init__.py b/python/semantic_kernel/connectors/memory/volatile/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/semantic_kernel/connectors/memory/volatile/volatile_collection.py b/python/semantic_kernel/connectors/memory/volatile/volatile_collection.py new file mode 100644 index 000000000000..2b436e7e6ea5 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/volatile/volatile_collection.py @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft. All rights reserved. + +import sys +from collections.abc import Mapping, Sequence +from typing import Any, ClassVar, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from pydantic import Field + +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.kernel_types import OneOrMany + +KEY_TYPES = str | int | float + +TModel = TypeVar("TModel") + + +class VolatileCollection(VectorStoreRecordCollection[KEY_TYPES, TModel]): + """Volatile Collection.""" + + inner_storage: dict[KEY_TYPES, dict] = Field(default_factory=dict) + supported_key_types: ClassVar[list[str] | None] = ["str", "int", "float"] + + def __init__( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + ): + """Create a Volatile Collection.""" + super().__init__( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + ) + + @override + async def _inner_delete(self, keys: Sequence[KEY_TYPES], **kwargs: Any) -> None: + for key in keys: + self.inner_storage.pop(key, None) + + @override + async def _inner_get(self, keys: Sequence[KEY_TYPES], **kwargs: Any) -> Any | OneOrMany[TModel] | None: + return [self.inner_storage[key] for key in keys if key in self.inner_storage] + + @override + async def _inner_upsert(self, records: Sequence[Any], **kwargs: Any) -> Sequence[KEY_TYPES]: + updated_keys = [] + for record in records: + key = record[self._key_field_name] if isinstance(record, Mapping) else getattr(record, self._key_field_name) + self.inner_storage[key] = record + updated_keys.append(key) + return updated_keys + + def _deserialize_store_models_to_dicts(self, records: Sequence[Any], **kwargs: Any) -> Sequence[dict[str, Any]]: + return records + + def _serialize_dicts_to_store_models(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Sequence[Any]: + return records + + @override + async def create_collection(self, **kwargs: Any) -> None: + pass + + @override + async def delete_collection(self, **kwargs: Any) -> None: + self.inner_storage = {} + + @override + async def does_collection_exist(self, **kwargs: Any) -> bool: + return True diff --git a/python/semantic_kernel/connectors/memory/volatile/volatile_store.py b/python/semantic_kernel/connectors/memory/volatile/volatile_store.py new file mode 100644 index 000000000000..1df7b2948373 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/volatile/volatile_store.py @@ -0,0 +1,47 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import Sequence +from typing import Any, TypeVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + + +from semantic_kernel.connectors.memory.volatile.volatile_collection import VolatileCollection +from semantic_kernel.data.vector_store import VectorStore +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.utils.experimental_decorator import experimental_class + +logger: logging.Logger = logging.getLogger(__name__) + +TModel = TypeVar("TModel") + + +@experimental_class +class VolatileStore(VectorStore): + """Create a Volatile Vector Store.""" + + @override + async def list_collection_names(self, **kwargs) -> Sequence[str]: + return list(self.vector_record_collections.keys()) + + @override + def get_collection( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + **kwargs: Any, + ) -> "VectorStoreRecordCollection": + if collection_name not in self.vector_record_collections: + self.vector_record_collections[collection_name] = VolatileCollection( + data_model_type=data_model_type, + data_model_definition=data_model_definition, + collection_name=collection_name, + ) + return self.vector_record_collections[collection_name] diff --git a/python/semantic_kernel/data/__init__.py b/python/semantic_kernel/data/__init__.py new file mode 100644 index 000000000000..b4284c1615e0 --- /dev/null +++ b/python/semantic_kernel/data/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.data.vector_store import VectorStore +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import ( + VectorStoreRecordDefinition, +) +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) + +__all__ = [ + "VectorStore", + "VectorStoreRecordCollection", + "VectorStoreRecordDataField", + "VectorStoreRecordDefinition", + "VectorStoreRecordKeyField", + "VectorStoreRecordVectorField", + "vectorstoremodel", +] diff --git a/python/semantic_kernel/data/const.py b/python/semantic_kernel/data/const.py new file mode 100644 index 000000000000..6972d9c7945e --- /dev/null +++ b/python/semantic_kernel/data/const.py @@ -0,0 +1,20 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from enum import Enum + + +class IndexKind(str, Enum): + """Index kinds for similarity search.""" + + HNSW = "hnsw" + FLAT = "flat" + + +class DistanceFunction(str, Enum): + """Distance functions for similarity search.""" + + COSINE = "cosine" + DOT_PROD = "dot_prod" + EUCLIDEAN = "euclidean" + MANHATTAN = "manhattan" diff --git a/python/semantic_kernel/data/vector_store.py b/python/semantic_kernel/data/vector_store.py new file mode 100644 index 000000000000..e51f145c7168 --- /dev/null +++ b/python/semantic_kernel/data/vector_store.py @@ -0,0 +1,36 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from abc import abstractmethod +from collections.abc import Sequence +from typing import Any + +from pydantic import Field + +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.kernel_pydantic import KernelBaseModel +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class VectorStore(KernelBaseModel): + """Base class for vector stores.""" + + vector_record_collections: dict[str, VectorStoreRecordCollection] = Field(default_factory=dict) + + @abstractmethod + def get_collection( + self, + collection_name: str, + data_model_type: type[object], + data_model_definition: VectorStoreRecordDefinition | None = None, + **kwargs: Any, + ) -> VectorStoreRecordCollection: + """Get a vector record store.""" + ... # pragma: no cover + + @abstractmethod + async def list_collection_names(self, **kwargs) -> Sequence[str]: + """Get the names of all collections.""" + ... # pragma: no cover diff --git a/python/semantic_kernel/data/vector_store_model_decorator.py b/python/semantic_kernel/data/vector_store_model_decorator.py new file mode 100644 index 000000000000..e89fd6d7f766 --- /dev/null +++ b/python/semantic_kernel/data/vector_store_model_decorator.py @@ -0,0 +1,117 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +from inspect import _empty, signature +from types import NoneType +from typing import Any + +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import VectorStoreRecordField, VectorStoreRecordVectorField +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException + +logger = logging.getLogger(__name__) + + +def vectorstoremodel( + cls: Any | None = None, +): + """Returns the class as a vector store model. + + This decorator makes a class a vector store model. + There are three things being checked: + - The class must have at least one field with a annotation, + of type VectorStoreRecordKeyField, VectorStoreRecordDataField or VectorStoreRecordVectorField. + - The class must have exactly one field with the VectorStoreRecordKeyField annotation. + + Optionally, when there are VectorStoreRecordDataFields that specify a embedding property name, + there must be a corresponding VectorStoreRecordVectorField with the same name. + + Args: + cls: The class to be decorated. + + Raises: + DataModelException: If the class does not implement the serialize and deserialize methods. + DataModelException: If there are no fields with a VectorStoreRecordField annotation. + DataModelException: If there are fields with no name. + DataModelException: If there is no key field. + DataModelException: If there is a field with an embedding property name but no corresponding vector field. + """ + + def wrap(cls: Any): + # get fields and annotations + cls_sig = signature(cls) + setattr(cls, "__kernel_vectorstoremodel__", True) + setattr(cls, "__kernel_vectorstoremodel_definition__", _parse_signature_to_definition(cls_sig.parameters)) + + return cls + + # See if we're being called as @vectorstoremodel or @vectorstoremodel(). + if cls is None: + # We're called with parens. + return wrap + + # We're called as @vectorstoremodel without parens. + return wrap(cls) + + +def _parse_signature_to_definition(parameters) -> VectorStoreRecordDefinition: + if len(parameters) == 0: + raise VectorStoreModelException( + "There must be at least one field in the datamodel. If you are using this with a @dataclass, " + "you might have inverted the order of the decorators, the vectorstoremodel decorator should be the top one." + ) + fields: dict[str, VectorStoreRecordField] = {} + for field in parameters.values(): + annotation = field.annotation + # check first if there are any annotations + if not hasattr(annotation, "__metadata__"): + if field._default is _empty: + raise VectorStoreModelException( + "Fields that do not have a VectorStoreRecord* annotation must have a default value." + ) + logger.info( + f'Field "{field.name}" does not have a VectorStoreRecord* ' + "annotation, will not be part of the record." + ) + continue + property_type = annotation.__origin__ + if (args := getattr(property_type, "__args__", None)) and NoneType in args and len(args) == 2: + property_type = args[0] + metadata = annotation.__metadata__ + field_type = None + for item in metadata: + if isinstance(item, VectorStoreRecordField): + field_type = item + if not field_type.name or field_type.name != field.name: + field_type.name = field.name + if not field_type.property_type: + if hasattr(property_type, "__args__"): + if isinstance(item, VectorStoreRecordVectorField): + field_type.property_type = property_type.__args__[0].__name__ + elif property_type.__name__ == "list": + field_type.property_type = f"{property_type.__name__}[{property_type.__args__[0].__name__}]" + else: + field_type.property_type = property_type.__name__ + + else: + field_type.property_type = property_type.__name__ + elif isinstance(item, type(VectorStoreRecordField)): + if hasattr(property_type, "__args__") and property_type.__name__ == "list": + property_type_name = f"{property_type.__name__}[{property_type.__args__[0].__name__}]" + else: + property_type_name = property_type.__name__ + field_type = item(name=field.name, property_type=property_type_name) + if not field_type: + if field._default is _empty: + raise VectorStoreModelException( + "Fields that do not have a VectorStoreRecord* annotation must have a default value." + ) + logger.debug( + f'Field "{field.name}" does not have a VectorStoreRecordField ' + "annotation, will not be part of the record." + ) + continue + # field name is set either when not None or by instantiating a new field + assert field_type.name is not None # nosec + fields[field_type.name] = field_type + return VectorStoreRecordDefinition(fields=fields) diff --git a/python/semantic_kernel/data/vector_store_model_definition.py b/python/semantic_kernel/data/vector_store_model_definition.py new file mode 100644 index 000000000000..b4e11bc78359 --- /dev/null +++ b/python/semantic_kernel/data/vector_store_model_definition.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft. All rights reserved. + +from dataclasses import dataclass, field + +from semantic_kernel.data.vector_store_model_protocols import ( + DeserializeProtocol, + FromDictProtocol, + SerializeProtocol, + ToDictProtocol, +) +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException + + +@dataclass +class VectorStoreRecordDefinition: + """Memory record definition. + + Args: + fields: The fields of the record. + container_mode: Whether the record is in container mode. + to_dict: The to_dict function, should take a record and return a list of dicts. + from_dict: The from_dict function, should take a list of dicts and return a record. + serialize: The serialize function, should take a record and return the type specific to a datastore. + deserialize: The deserialize function, should take a type specific to a datastore and return a record. + + """ + + key_field_name: str = field(init=False) + fields: dict[str, VectorStoreRecordField] + container_mode: bool = False + to_dict: ToDictProtocol | None = None + from_dict: FromDictProtocol | None = None + serialize: SerializeProtocol | None = None + deserialize: DeserializeProtocol | None = None + + @property + def field_names(self) -> list[str]: + """Get the names of the fields.""" + return list(self.fields.keys()) + + @property + def key_field(self) -> "VectorStoreRecordKeyField": + """Get the key field.""" + return self.fields[self.key_field_name] # type: ignore + + @property + def vector_field_names(self) -> list[str]: + """Get the names of the vector fields.""" + return [name for name, value in self.fields.items() if isinstance(value, VectorStoreRecordVectorField)] + + @property + def vector_fields(self) -> list["VectorStoreRecordVectorField"]: + """Get the names of the vector fields.""" + return [field for field in self.fields.values() if isinstance(field, VectorStoreRecordVectorField)] + + def __post_init__(self): + """Validate the fields. + + Raises: + DataModelException: If a field does not have a name. + DataModelException: If there is a field with an embedding property name but no corresponding vector field. + DataModelException: If there is no key field. + """ + if len(self.fields) == 0: + raise VectorStoreModelException( + "There must be at least one field with a VectorStoreRecordField annotation." + ) + self.key_field_name = "" + for name, value in self.fields.items(): + if not name: + raise VectorStoreModelException("Fields must have a name.") + if value.name is None: + value.name = name + if ( + isinstance(value, VectorStoreRecordDataField) + and value.has_embedding + and value.embedding_property_name not in self.field_names + ): + raise VectorStoreModelException( + "Data field with embedding property name must refer to a existing vector field." + ) + if isinstance(value, VectorStoreRecordKeyField): + if self.key_field_name != "": + raise VectorStoreModelException("Memory record definition must have exactly one key field.") + self.key_field_name = name + if not self.key_field_name: + raise VectorStoreModelException("Memory record definition must have exactly one key field.") diff --git a/python/semantic_kernel/data/vector_store_model_protocols.py b/python/semantic_kernel/data/vector_store_model_protocols.py new file mode 100644 index 000000000000..18d5e5b9709c --- /dev/null +++ b/python/semantic_kernel/data/vector_store_model_protocols.py @@ -0,0 +1,114 @@ +# Copyright (c) Microsoft. All rights reserved. + +from collections.abc import Sequence +from typing import Any, Protocol, TypeVar, runtime_checkable + +TModel = TypeVar("TModel", bound=object) + + +@runtime_checkable +class VectorStoreModelFunctionSerdeProtocol(Protocol): + """Data model serialization and deserialization protocol. + + This can optionally be implemented to allow single step serialization and deserialization + for using your data model with a specific datastore. + """ + + def serialize(self, **kwargs: Any) -> Any: + """Serialize the object to the format required by the data store.""" + ... # pragma: no cover + + @classmethod + def deserialize(cls: type[TModel], obj: Any, **kwargs: Any) -> TModel: + """Deserialize the output of the data store to an object.""" + ... # pragma: no cover + + +@runtime_checkable +class VectorStoreModelPydanticProtocol(Protocol): + """Class used internally to make sure a datamodel has model_dump and model_validate.""" + + def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]: + """Serialize the object to the format required by the data store.""" + ... # pragma: no cover + + @classmethod + def model_validate(cls: type[TModel], *args: Any, **kwargs: Any) -> TModel: + """Deserialize the output of the data store to an object.""" + ... # pragma: no cover + + +@runtime_checkable +class VectorStoreModelToDictFromDictProtocol(Protocol): + """Class used internally to check if a model has to_dict and from_dict methods.""" + + def to_dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]: + """Serialize the object to the format required by the data store.""" + ... # pragma: no cover + + @classmethod + def from_dict(cls: type[TModel], *args: Any, **kwargs: Any) -> TModel: + """Deserialize the output of the data store to an object.""" + ... # pragma: no cover + + +@runtime_checkable +class ToDictProtocol(Protocol): + """Protocol for to_dict method. + + Args: + record: The record to be serialized. + **kwargs: Additional keyword arguments. + + Returns: + A list of dictionaries. + """ + + def __call__(self, record: Any, **kwargs: Any) -> Sequence[dict[str, Any]]: ... # pragma: no cover # noqa: D102 + + +@runtime_checkable +class FromDictProtocol(Protocol): + """Protocol for from_dict method. + + Args: + records: A list of dictionaries. + **kwargs: Additional keyword arguments. + + Returns: + A record or list thereof. + """ + + def __call__(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Any: ... # noqa: D102 + + +@runtime_checkable +class SerializeProtocol(Protocol): + """Protocol for serialize method. + + Args: + record: The record to be serialized. + **kwargs: Additional keyword arguments. + + Returns: + The serialized record, ready to be consumed by the specific store. + + """ + + def __call__(self, record: Any, **kwargs: Any) -> Any: ... # noqa: D102 + + +@runtime_checkable +class DeserializeProtocol(Protocol): + """Protocol for deserialize method. + + Args: + records: The serialized record directly from the store. + **kwargs: Additional keyword arguments. + + Returns: + The deserialized record in the format expected by the application. + + """ + + def __call__(self, records: Any, **kwargs: Any) -> Any: ... # noqa: D102 diff --git a/python/semantic_kernel/data/vector_store_record_collection.py b/python/semantic_kernel/data/vector_store_record_collection.py new file mode 100644 index 000000000000..a3b886db9b11 --- /dev/null +++ b/python/semantic_kernel/data/vector_store_record_collection.py @@ -0,0 +1,560 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import contextlib +import logging +from abc import abstractmethod +from collections.abc import Awaitable, Callable, Mapping, Sequence +from typing import Any, ClassVar, Generic, TypeVar + +from pydantic import model_validator + +from semantic_kernel.data.vector_store_model_definition import ( + VectorStoreRecordDefinition, +) +from semantic_kernel.data.vector_store_model_protocols import ( + VectorStoreModelFunctionSerdeProtocol, + VectorStoreModelPydanticProtocol, + VectorStoreModelToDictFromDictProtocol, +) +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + VectorStoreModelDeserializationException, + VectorStoreModelSerializationException, + VectorStoreModelValidationError, +) +from semantic_kernel.kernel_pydantic import KernelBaseModel +from semantic_kernel.kernel_types import OneOrMany +from semantic_kernel.utils.experimental_decorator import experimental_class + +TModel = TypeVar("TModel", bound=object) +TKey = TypeVar("TKey") +_T = TypeVar("_T", bound="VectorStoreRecordCollection") + +logger = logging.getLogger(__name__) + + +@experimental_class +class VectorStoreRecordCollection(KernelBaseModel, Generic[TKey, TModel]): + """Base class for a vector store record collection.""" + + collection_name: str + data_model_type: type[TModel] + data_model_definition: VectorStoreRecordDefinition + supported_key_types: ClassVar[list[str] | None] = None + supported_vector_types: ClassVar[list[str] | None] = None + + @property + def _container_mode(self) -> bool: + return self.data_model_definition.container_mode + + @property + def _key_field_name(self) -> str: + return self.data_model_definition.key_field_name + + @model_validator(mode="before") + @classmethod + def _ensure_data_model_definition(cls: type[_T], data: dict[str, Any]) -> dict[str, Any]: + """Ensure there is a data model definition, if it isn't passed, try to get it from the data model type.""" + if not data.get("data_model_definition"): + data["data_model_definition"] = getattr( + data["data_model_type"], "__kernel_vectorstoremodel_definition__", None + ) + return data + + def model_post_init(self, __context: object | None = None): + """Post init function that sets the key field and container mode values, and validates the datamodel.""" + self._validate_data_model() + + # region Overload Methods + async def close(self): + """Close the connection.""" + return # pragma: no cover + + @abstractmethod + async def _inner_upsert( + self, + records: Sequence[Any], + **kwargs: Any, + ) -> Sequence[TKey]: + """Upsert the records, this should be overridden by the child class. + + Args: + records (Sequence[Any]): The records, the format is specific to the store. + **kwargs (Any): Additional arguments, to be passed to the store. + + Returns: + The keys of the upserted records. + """ + ... # pragma: no cover + + @abstractmethod + async def _inner_get(self, keys: Sequence[TKey], **kwargs: Any) -> OneOrMany[Any] | None: + """Get the records, this should be overridden by the child class. + + Args: + keys (Sequence[TKey]): The keys to get. + **kwargs (Any): Additional arguments. + + Returns: + The records from the store, not deserialized. + """ + ... # pragma: no cover + + @abstractmethod + async def _inner_delete(self, keys: Sequence[TKey], **kwargs: Any) -> None: + """Delete the records, this should be overridden by the child class. + + Args: + keys (Sequence[TKey]): The keys. + **kwargs (Any): Additional arguments. + """ + ... # pragma: no cover + + def _validate_data_model(self): + """Internal function that can be overloaded by child classes to validate datatypes, etc. + + This should take the VectorStoreRecordDefinition from the item_type and validate it against the store. + + Checks can include, allowed naming of parameters, allowed data types, allowed vector dimensions. + + Default checks are that the key field is in the allowed key types and the vector fields + are in the allowed vector types. + + Raises: + VectorStoreModelValidationError: If the key field is not in the allowed key types. + VectorStoreModelValidationError: If the vector fields are not in the allowed vector types. + + """ + if ( + self.supported_key_types + and self.data_model_definition.key_field.property_type + and self.data_model_definition.key_field.property_type not in self.supported_key_types + ): + raise VectorStoreModelValidationError( + f"Key field must be one of {self.supported_key_types}, " + f"got {self.data_model_definition.key_field.property_type}" + ) + if not self.supported_vector_types: + return + for field in self.data_model_definition.vector_fields: + if field.property_type and field.property_type not in self.supported_vector_types: + raise VectorStoreModelValidationError( + f"Vector field {field.name} must be one of {self.supported_vector_types}, got {field.property_type}" + ) + + @abstractmethod + def _serialize_dicts_to_store_models(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Sequence[Any]: + """Serialize a list of dicts of the data to the store model. + + This method should be overridden by the child class to convert the dict to the store model. + """ + ... # pragma: no cover + + @abstractmethod + def _deserialize_store_models_to_dicts(self, records: Sequence[Any], **kwargs: Any) -> Sequence[dict[str, Any]]: + """Deserialize the store models to a list of dicts. + + This method should be overridden by the child class to convert the store model to a list of dicts. + """ + ... # pragma: no cover + + async def create_collection_if_not_exists(self, **kwargs: Any) -> bool: + """Create the collection in the service if it does not exists. + + First uses does_collection_exist to check if it exists, if it does returns False. + Otherwise, creates the collection and returns True. + + """ + if await self.does_collection_exist(**kwargs): + return False + await self.create_collection(**kwargs) + return True + + @abstractmethod + async def create_collection(self, **kwargs: Any) -> None: + """Create the collection in the service.""" + ... # pragma: no cover + + @abstractmethod + async def does_collection_exist(self, **kwargs: Any) -> bool: + """Check if the collection exists.""" + ... # pragma: no cover + + @abstractmethod + async def delete_collection(self, **kwargs: Any) -> None: + """Delete the collection.""" + ... # pragma: no cover + + # region Public Methods + + async def upsert( + self, + record: TModel, + embedding_generation_function: Callable[ + [TModel, type[TModel] | None, VectorStoreRecordDefinition | None], Awaitable[TModel] + ] + | None = None, + **kwargs: Any, + ) -> OneOrMany[TKey] | None: + """Upsert a record. + + Args: + record (TModel): The record. + embedding_generation_function (Callable): Supply this function to generate embeddings. + This will be called with the data model definition and the records, + should return the records with vectors. + This can be supplied by using the add_vector_to_records method from the VectorStoreRecordUtils. + **kwargs (Any): Additional arguments. + + Returns: + The key of the upserted record or a list of keys, when a container type is used. + """ + if embedding_generation_function: + record = await embedding_generation_function(record, self.data_model_type, self.data_model_definition) + + try: + data = self.serialize(record) + except Exception as exc: + raise MemoryConnectorException(f"Error serializing record: {exc}") from exc + + try: + results = await self._inner_upsert(data if isinstance(data, Sequence) else [data], **kwargs) + except Exception as exc: + raise MemoryConnectorException(f"Error upserting record: {exc}") from exc + + if self._container_mode: + return results + return results[0] + + async def upsert_batch( + self, + records: OneOrMany[TModel], + embedding_generation_function: Callable[ + [OneOrMany[TModel], type[TModel] | None, VectorStoreRecordDefinition | None], Awaitable[OneOrMany[TModel]] + ] + | None = None, + **kwargs: Any, + ) -> Sequence[TKey]: + """Upsert a batch of records. + + Args: + records (Sequence[TModel] | TModel): The records to upsert, can be a list of records, or a single container. + embedding_generation_function (Callable): Supply this function to generate embeddings. + This will be called with the data model definition and the records, + should return the records with vectors. + This can be supplied by using the add_vector_to_records method from the VectorStoreRecordUtils. + **kwargs (Any): Additional arguments. + + Returns: + Sequence[TKey]: The keys of the upserted records, this is always a list, + corresponds to the input or the items in the container. + """ + if embedding_generation_function: + records = await embedding_generation_function(records, self.data_model_type, self.data_model_definition) + + try: + data = self.serialize(records) + except Exception as exc: + raise MemoryConnectorException(f"Error serializing records: {exc}") from exc + + try: + return await self._inner_upsert(data, **kwargs) # type: ignore + except Exception as exc: + raise MemoryConnectorException(f"Error upserting records: {exc}") from exc + + async def get(self, key: TKey, **kwargs: Any) -> TModel | None: + """Get a record. + + Args: + key (TKey): The key. + **kwargs (Any): Additional arguments. + + Returns: + TModel: The record. + """ + try: + records = await self._inner_get([key]) + except Exception as exc: + raise MemoryConnectorException(f"Error getting record: {exc}") from exc + + if not records: + return None + + try: + model_records = self.deserialize(records[0], keys=[key], **kwargs) + except Exception as exc: + raise MemoryConnectorException(f"Error deserializing record: {exc}") from exc + + # there are many code paths within the deserialize method, some supplied by the developer, + # and so depending on what is used, + # it might return a sequence, so we just return the first element, + # there should never be multiple elements (this is not a batch get), + # hence a raise if there are. + if not isinstance(model_records, Sequence): + return model_records + if len(model_records) == 1: + return model_records[0] + raise MemoryConnectorException(f"Error deserializing record, multiple records returned: {model_records}") + + async def get_batch(self, keys: Sequence[TKey], **kwargs: Any) -> OneOrMany[TModel] | None: + """Get a batch of records. + + Args: + keys (Sequence[TKey]): The keys. + **kwargs (Any): Additional arguments. + + Returns: + The records, either a list of TModel or the container type. + """ + try: + records = await self._inner_get(keys) + except Exception as exc: + raise MemoryConnectorException(f"Error getting records: {exc}") from exc + + if not records: + return None + + try: + return self.deserialize(records, keys=keys, **kwargs) + except Exception as exc: + raise MemoryConnectorException(f"Error deserializing record: {exc}") from exc + + async def delete(self, key: TKey, **kwargs: Any) -> None: + """Delete a record. + + Args: + key (TKey): The key. + **kwargs (Any): Additional arguments. + + """ + try: + await self._inner_delete([key], **kwargs) + except Exception as exc: + raise MemoryConnectorException(f"Error deleting record: {exc}") from exc + + async def delete_batch(self, keys: Sequence[TKey], **kwargs: Any) -> None: + """Delete a batch of records. + + Args: + keys (Sequence[TKey]): The keys. + **kwargs (Any): Additional arguments. + + """ + try: + await self._inner_delete(keys, **kwargs) + except Exception as exc: + raise MemoryConnectorException(f"Error deleting records: {exc}") from exc + + # region Internal Serialization methods + + def serialize(self, records: OneOrMany[TModel], **kwargs: Any) -> OneOrMany[Any]: + """Serialize the data model to the store model. + + This method follows the following steps: + 1. Check if the data model has a serialize method. + Use that method to serialize and return the result. + 2. Serialize the records into a dict, using the data model specific method. + 3. Convert the dict to the store model, using the store specific method. + + If overriding this method, make sure to first try to serialize the data model to the store model, + before doing the store specific version, + the user supplied version should have precedence. + """ + if serialized := self._serialize_data_model_to_store_model(records): + return serialized + + if isinstance(records, Sequence): + dict_records = [self._serialize_data_model_to_dict(rec) for rec in records] + return self._serialize_dicts_to_store_models(dict_records, **kwargs) # type: ignore + + dict_records = self._serialize_data_model_to_dict(records) # type: ignore + if isinstance(dict_records, Sequence): + # most likely this is a container, so we return all records as a list + # can also be a single record, but the to_dict returns a list + # hence we will treat it as a container. + return self._serialize_dicts_to_store_models(dict_records, **kwargs) # type: ignore + # this case is single record in, single record out + return self._serialize_dicts_to_store_models([dict_records], **kwargs)[0] + + def deserialize(self, records: OneOrMany[Any | dict[str, Any]], **kwargs: Any) -> OneOrMany[TModel] | None: + """Deserialize the store model to the data model. + + This method follows the following steps: + 1. Check if the data model has a deserialize method. + Use that method to deserialize and return the result. + 2. Deserialize the store model to a dict, using the store specific method. + 3. Convert the dict to the data model, using the data model specific method. + """ + if deserialized := self._deserialize_store_model_to_data_model(records, **kwargs): + return deserialized + + if isinstance(records, Sequence): + dict_records = self._deserialize_store_models_to_dicts(records, **kwargs) + if self._container_mode: + return self._deserialize_dict_to_data_model(dict_records, **kwargs) + return [self._deserialize_dict_to_data_model(rec, **kwargs) for rec in dict_records] + + dict_record = self._deserialize_store_models_to_dicts([records], **kwargs)[0] + if not dict_record: + return None + return self._deserialize_dict_to_data_model(dict_record, **kwargs) + + def _serialize_data_model_to_store_model(self, record: OneOrMany[TModel], **kwargs: Any) -> OneOrMany[Any] | None: + """Serialize the data model to the store model. + + This works when the data model has supplied a serialize method, specific to a data source. + This is a method called 'serialize()' on the data model or part of the vector store record definition. + + The developer is responsible for correctly serializing for the specific data source. + """ + if isinstance(record, Sequence): + result = [self._serialize_data_model_to_store_model(rec, **kwargs) for rec in record] + if not all(result): + return None + return result + if self.data_model_definition.serialize: + return self.data_model_definition.serialize(record, **kwargs) # type: ignore + if isinstance(record, VectorStoreModelFunctionSerdeProtocol): + try: + return record.serialize(**kwargs) + except Exception as exc: + raise VectorStoreModelSerializationException(f"Error serializing record: {exc}") from exc + return None + + def _deserialize_store_model_to_data_model(self, record: OneOrMany[Any], **kwargs: Any) -> OneOrMany[TModel] | None: + """Deserialize the store model to the data model. + + This works when the data model has supplied a deserialize method, specific to a data source. + This uses a method called 'deserialize()' on the data model or part of the vector store record definition. + + The developer is responsible for correctly deserializing for the specific data source. + """ + if self.data_model_definition.deserialize: + if isinstance(record, Sequence): + return self.data_model_definition.deserialize(record, **kwargs) + return self.data_model_definition.deserialize([record], **kwargs) + if isinstance(self.data_model_type, VectorStoreModelFunctionSerdeProtocol): + try: + if isinstance(record, Sequence): + return [self.data_model_type.deserialize(rec, **kwargs) for rec in record] + return self.data_model_type.deserialize(record, **kwargs) + except Exception as exc: + raise VectorStoreModelSerializationException(f"Error deserializing record: {exc}") from exc + return None + + def _serialize_data_model_to_dict(self, record: TModel, **kwargs: Any) -> OneOrMany[dict[str, Any]]: + """This function is used if no serialize method is found on the data model. + + This will generally serialize the data model to a dict, should not be overridden by child classes. + + The output of this should be passed to the serialize_dict_to_store_model method. + """ + if self.data_model_definition.to_dict: + return self.data_model_definition.to_dict(record, **kwargs) + if isinstance(record, VectorStoreModelPydanticProtocol): + try: + ret = record.model_dump() + if not any(field.serialize_function is not None for field in self.data_model_definition.vector_fields): + return ret + for field in self.data_model_definition.vector_fields: + if field.serialize_function: + assert field.name is not None # nosec + ret[field.name] = field.serialize_function(ret[field.name]) + return ret + except Exception as exc: + raise VectorStoreModelSerializationException(f"Error serializing record: {exc}") from exc + if isinstance(record, VectorStoreModelToDictFromDictProtocol): + try: + ret = record.to_dict() + if not any(field.serialize_function is not None for field in self.data_model_definition.vector_fields): + return ret + for field in self.data_model_definition.vector_fields: + if field.serialize_function: + assert field.name is not None # nosec + ret[field.name] = field.serialize_function(ret[field.name]) + return ret + except Exception as exc: + raise VectorStoreModelSerializationException(f"Error serializing record: {exc}") from exc + + store_model = {} + for field_name in self.data_model_definition.field_names: + try: + value = record[field_name] if isinstance(record, Mapping) else getattr(record, field_name) + if func := getattr(self.data_model_definition.fields[field_name], "serialize_function", None): + value = func(value) + store_model[field_name] = value + except (AttributeError, KeyError) as exc: + raise VectorStoreModelSerializationException( + f"Error serializing record, not able to get: {field_name}" + ) from exc + return store_model + + def _deserialize_dict_to_data_model(self, record: OneOrMany[dict[str, Any]], **kwargs: Any) -> TModel: + """This function is used if no deserialize method is found on the data model. + + This method is the second step and will deserialize a dict to the data model, + should not be overridden by child classes. + + The input of this should come from the _deserialized_store_model_to_dict function. + """ + if self.data_model_definition.from_dict: + if isinstance(record, Sequence): + return self.data_model_definition.from_dict(record, **kwargs) + ret = self.data_model_definition.from_dict([record], **kwargs) + return ret if self._container_mode else ret[0] + if isinstance(record, Sequence): + if len(record) > 1: + raise VectorStoreModelDeserializationException( + "Cannot deserialize multiple records to a single record unless you are using a container." + ) + record = record[0] + if isinstance(self.data_model_type, VectorStoreModelPydanticProtocol): + try: + if not any(field.serialize_function is not None for field in self.data_model_definition.vector_fields): + return self.data_model_type.model_validate(record) + for field in self.data_model_definition.vector_fields: + if field.serialize_function: + record[field.name] = field.serialize_function(record[field.name]) + return self.data_model_type.model_validate(record) + except Exception as exc: + raise VectorStoreModelDeserializationException(f"Error deserializing record: {exc}") from exc + if isinstance(self.data_model_type, VectorStoreModelToDictFromDictProtocol): + try: + if not any(field.serialize_function is not None for field in self.data_model_definition.vector_fields): + return self.data_model_type.from_dict(record) + for field in self.data_model_definition.vector_fields: + if field.serialize_function: + record[field.name] = field.serialize_function(record[field.name]) + return self.data_model_type.from_dict(record) + except Exception as exc: + raise VectorStoreModelDeserializationException(f"Error deserializing record: {exc}") from exc + data_model_dict: dict[str, Any] = {} + for field_name in self.data_model_definition.fields: # type: ignore + try: + value = record[field_name] + if func := getattr(self.data_model_definition.fields[field_name], "deserialize_function", None): + value = func(value) + data_model_dict[field_name] = value + except KeyError as exc: + raise VectorStoreModelDeserializationException( + f"Error deserializing record, not able to get: {field_name}" + ) from exc + if self.data_model_type is dict: + return data_model_dict # type: ignore + return self.data_model_type(**data_model_dict) + + # region Internal Functions + + async def __aenter__(self): + """Enter the context manager.""" + return self + + async def __aexit__(self, *args): + """Exit the context manager.""" + await self.close() + + def __del__(self): + """Delete the instance.""" + with contextlib.suppress(Exception): + asyncio.get_running_loop().create_task(self.close()) diff --git a/python/semantic_kernel/data/vector_store_record_fields.py b/python/semantic_kernel/data/vector_store_record_fields.py new file mode 100644 index 000000000000..5f01be7022f4 --- /dev/null +++ b/python/semantic_kernel/data/vector_store_record_fields.py @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft. All rights reserved. + +from abc import ABC +from collections.abc import Callable +from typing import Any + +from pydantic import Field +from pydantic.dataclasses import dataclass + +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.data.const import DistanceFunction, IndexKind + + +@dataclass +class VectorStoreRecordField(ABC): + """Base class for all Vector Store Record Fields.""" + + name: str | None = None + property_type: str | None = None + + +@dataclass +class VectorStoreRecordKeyField(VectorStoreRecordField): + """Memory record key field.""" + + +@dataclass +class VectorStoreRecordDataField(VectorStoreRecordField): + """Memory record data field.""" + + has_embedding: bool = False + embedding_property_name: str | None = None + is_filterable: bool | None = None + is_full_text_searchable: bool | None = None + + +@dataclass +class VectorStoreRecordVectorField(VectorStoreRecordField): + """Memory record vector field. + + Most vectors stores use a `list[float]` as the data type for vectors. + This is the default and all vector stores in SK use this internally. + But in your class you may want to use a numpy array or some other optimized type, + in order to support that, + you can set the deserialize_function to a function that takes a list of floats and returns the optimized type, + and then also supply a serialize_function that takes the optimized type and returns a list of floats. + + For instance for numpy, that would be `serialize_function=np.ndarray.tolist` and `deserialize_function=np.array`, + (with `import numpy as np` at the top of your file). + if you want to set it up with more specific options, use a lambda, a custom function or a partial. + + Args: + property_type (str, optional): Property type. + For vectors this should be the inner type of the vector. + By default the vector will be a list of numbers. + If you want to use a numpy array or some other optimized format, + set the cast_function with a function + that takes a list of floats and returns a numpy array. + local_embedding (bool, optional): Whether to embed the vector locally. Defaults to True. + embedding_settings (dict[str, PromptExecutionSettings], optional): Embedding settings. + The key is the name of the embedding service to use, can be multiple ones. + serialize_function (Callable[[Any], list[float | int]], optional): Serialize function, + should take the vector and return a list of numbers. + deserialize_function (Callable[[list[float | int]], Any], optional): Deserialize function, + should take a list of numbers and return the vector. + """ + + local_embedding: bool = True + dimensions: int | None = None + index_kind: IndexKind | None = None + distance_function: DistanceFunction | None = None + embedding_settings: dict[str, PromptExecutionSettings] = Field(default_factory=dict) + serialize_function: Callable[[Any], list[float | int]] | None = None + deserialize_function: Callable[[list[float | int]], Any] | None = None + + +__all__ = [ + "VectorStoreRecordDataField", + "VectorStoreRecordKeyField", + "VectorStoreRecordVectorField", +] diff --git a/python/semantic_kernel/data/vector_store_record_utils.py b/python/semantic_kernel/data/vector_store_record_utils.py new file mode 100644 index 000000000000..665605ddc630 --- /dev/null +++ b/python/semantic_kernel/data/vector_store_record_utils.py @@ -0,0 +1,83 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from collections.abc import Callable +from typing import TYPE_CHECKING, TypeVar + +from semantic_kernel.data.vector_store_record_fields import VectorStoreRecordDataField, VectorStoreRecordVectorField +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException +from semantic_kernel.kernel_types import OneOrMany + +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition + from semantic_kernel.kernel import Kernel + +TModel = TypeVar("TModel", bound=object) + + +class VectorStoreRecordUtils: + """Helper class to easily add embeddings to a (set of) vector store record.""" + + def __init__(self, kernel: "Kernel"): + """Initializes the VectorStoreRecordUtils with a kernel.""" + self.kernel = kernel + + async def add_vector_to_records( + self, + records: OneOrMany[TModel], + data_model_type: type[TModel] | None = None, + data_model_definition: "VectorStoreRecordDefinition | None" = None, + **kwargs, + ) -> OneOrMany[TModel]: + """Vectorize the vector record. + + This function can be passed to upsert or upsert batch of a VectorStoreRecordCollection. + + Loops through the fields of the data model definition, + looks at data fields, if they have a vector field, + looks up that vector field and checks if is a local embedding. + + If so adds that to a list of embeddings to make. + + Finally calls Kernel add_embedding_to_object with the list of embeddings to make. + + Optional arguments are passed onto the Kernel add_embedding_to_object call. + """ + # dict of embedding_field.name and tuple of record, settings, field_name + embeddings_to_make: list[tuple[str, str, dict[str, "PromptExecutionSettings"], Callable | None]] = [] + if not data_model_definition: + data_model_definition = getattr(data_model_type, "__kernel_vectorstoremodel_definition__", None) + if not data_model_definition: + raise VectorStoreModelException( + "Data model definition is required, either directly or from the data model type." + ) + for name, field in data_model_definition.fields.items(): # type: ignore + if ( + not isinstance(field, VectorStoreRecordDataField) + or not field.has_embedding + or not field.embedding_property_name + ): + continue + embedding_field = data_model_definition.fields.get(field.embedding_property_name) + if not isinstance(embedding_field, VectorStoreRecordVectorField): + raise VectorStoreModelException("Embedding field must be a VectorStoreRecordVectorField") + if embedding_field.local_embedding: + embeddings_to_make.append(( + name, + field.embedding_property_name, + embedding_field.embedding_settings, + embedding_field.deserialize_function, + )) + + for field_to_embed, field_to_store, settings, cast_callable in embeddings_to_make: + await self.kernel.add_embedding_to_object( + inputs=records, + field_to_embed=field_to_embed, + field_to_store=field_to_store, + execution_settings=settings, + container_mode=data_model_definition.container_mode, + cast_function=cast_callable, + **kwargs, + ) + return records diff --git a/python/semantic_kernel/exceptions/memory_connector_exceptions.py b/python/semantic_kernel/exceptions/memory_connector_exceptions.py index e2ca9856258c..0a94503aa414 100644 --- a/python/semantic_kernel/exceptions/memory_connector_exceptions.py +++ b/python/semantic_kernel/exceptions/memory_connector_exceptions.py @@ -10,6 +10,24 @@ class MemoryConnectorException(KernelException): pass +class VectorStoreModelException(MemoryConnectorException): + """Base class for all vector store model exceptions.""" + + pass + + +class VectorStoreModelSerializationException(VectorStoreModelException): + """An error occurred while serializing the vector store model.""" + + pass + + +class VectorStoreModelDeserializationException(VectorStoreModelException): + """An error occurred while deserializing the vector store model.""" + + pass + + class MemoryConnectorInitializationError(MemoryConnectorException): """An error occurred while initializing the memory connector.""" @@ -22,8 +40,18 @@ class MemoryConnectorResourceNotFound(MemoryConnectorException): pass +class VectorStoreModelValidationError(VectorStoreModelException): + """An error occurred while validating the vector store model.""" + + pass + + __all__ = [ "MemoryConnectorException", "MemoryConnectorInitializationError", "MemoryConnectorResourceNotFound", + "VectorStoreModelDeserializationException", + "VectorStoreModelException", + "VectorStoreModelSerializationException", + "VectorStoreModelValidationError", ] diff --git a/python/semantic_kernel/kernel.py b/python/semantic_kernel/kernel.py index c2a1cee615e3..7d0a989068e1 100644 --- a/python/semantic_kernel/kernel.py +++ b/python/semantic_kernel/kernel.py @@ -1,10 +1,11 @@ # Copyright (c) Microsoft. All rights reserved. import logging -from collections.abc import AsyncGenerator, AsyncIterable +from collections.abc import AsyncGenerator, AsyncIterable, Callable from copy import copy from typing import TYPE_CHECKING, Any, Literal, TypeVar +from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase from semantic_kernel.const import METADATA_EXCEPTION_KEY from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.function_call_content import FunctionCallContent @@ -18,6 +19,7 @@ OperationCancelledException, TemplateSyntaxError, ) +from semantic_kernel.exceptions.kernel_exceptions import KernelServiceNotFoundError from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, ) @@ -31,7 +33,7 @@ from semantic_kernel.functions.kernel_function_extension import KernelFunctionExtension from semantic_kernel.functions.kernel_function_from_prompt import KernelFunctionFromPrompt from semantic_kernel.functions.kernel_plugin import KernelPlugin -from semantic_kernel.kernel_types import AI_SERVICE_CLIENT_TYPE +from semantic_kernel.kernel_types import AI_SERVICE_CLIENT_TYPE, OneOrMany from semantic_kernel.prompt_template.const import KERNEL_TEMPLATE_FORMAT_NAME from semantic_kernel.reliability.kernel_reliability_extension import KernelReliabilityExtension from semantic_kernel.services.ai_service_selector import AIServiceSelector @@ -42,10 +44,12 @@ from semantic_kernel.connectors.ai.function_choice_behavior import ( FunctionChoiceBehavior, ) + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.functions.kernel_function import KernelFunction T = TypeVar("T") +TDataModel = TypeVar("TDataModel") logger: logging.Logger = logging.getLogger(__name__) @@ -422,3 +426,59 @@ async def _inner_auto_function_invoke_handler(self, context: AutoFunctionInvocat else: context.function_result = FunctionResult(function=context.function.metadata, value=value) return + + async def add_embedding_to_object( + self, + inputs: OneOrMany[TDataModel], + field_to_embed: str, + field_to_store: str, + execution_settings: dict[str, "PromptExecutionSettings"], + container_mode: bool = False, + cast_function: Callable[[list[float]], Any] | None = None, + **kwargs: Any, + ): + """Gather all fields to embed, batch the embedding generation and store.""" + contents: list[Any] = [] + dict_like = (getter := getattr(inputs, "get", False)) and callable(getter) + list_of_dicts: bool = False + if container_mode: + contents = inputs[field_to_embed].tolist() # type: ignore + elif isinstance(inputs, list): + list_of_dicts = (getter := getattr(inputs[0], "get", False)) and callable(getter) + for record in inputs: + if list_of_dicts: + contents.append(record.get(field_to_embed)) # type: ignore + else: + contents.append(getattr(record, field_to_embed)) + else: + if dict_like: + contents.append(inputs.get(field_to_embed)) # type: ignore + else: + contents.append(getattr(inputs, field_to_embed)) + vectors = None + service: EmbeddingGeneratorBase | None = None + for service_id, settings in execution_settings.items(): + service = self.get_service(service_id, type=EmbeddingGeneratorBase) # type: ignore + if service: + vectors = await service.generate_raw_embeddings(texts=contents, settings=settings, **kwargs) # type: ignore + break + if not service: + raise KernelServiceNotFoundError("No service found to generate embeddings.") + if vectors is None: + raise KernelInvokeException("No vectors were generated.") + if cast_function: + vectors = [cast_function(vector) for vector in vectors] + if container_mode: + inputs[field_to_store] = vectors # type: ignore + return + if isinstance(inputs, list): + for record, vector in zip(inputs, vectors): + if list_of_dicts: + record[field_to_store] = vector # type: ignore + else: + setattr(record, field_to_store, vector) + return + if dict_like: + inputs[field_to_store] = vectors[0] # type: ignore + return + setattr(inputs, field_to_store, vectors[0]) diff --git a/python/semantic_kernel/kernel_pydantic.py b/python/semantic_kernel/kernel_pydantic.py index e2bedb1c8f3f..0547f5d73b3a 100644 --- a/python/semantic_kernel/kernel_pydantic.py +++ b/python/semantic_kernel/kernel_pydantic.py @@ -3,7 +3,7 @@ from typing import Annotated, Any, ClassVar, TypeVar -from pydantic import BaseModel, ConfigDict, UrlConstraints +from pydantic import BaseModel, ConfigDict, Field, UrlConstraints from pydantic.networks import Url from pydantic_settings import BaseSettings, SettingsConfigDict @@ -35,8 +35,8 @@ class KernelBaseSettings(BaseSettings): """ env_prefix: ClassVar[str] = "" - env_file_path: str | None = None - env_file_encoding: str = "utf-8" + env_file_path: str | None = Field(None, exclude=True) + env_file_encoding: str = Field("utf-8", exclude=True) model_config = SettingsConfigDict( extra="ignore", diff --git a/python/semantic_kernel/kernel_types.py b/python/semantic_kernel/kernel_types.py index b94e97765d39..5bbfdb5fe3d6 100644 --- a/python/semantic_kernel/kernel_types.py +++ b/python/semantic_kernel/kernel_types.py @@ -1,7 +1,15 @@ # Copyright (c) Microsoft. All rights reserved. -from typing import TypeVar +from collections.abc import Sequence +from typing import TypeVar, Union from semantic_kernel.services.ai_service_client_base import AIServiceClientBase AI_SERVICE_CLIENT_TYPE = TypeVar("AI_SERVICE_CLIENT_TYPE", bound=AIServiceClientBase) + +T = TypeVar("T") + +OneOrMany = Union[T, Sequence[T]] +OptionalOneOrMany = Union[None, T, Sequence[T]] + +__all__ = ["AI_SERVICE_CLIENT_TYPE", "OneOrMany", "OptionalOneOrMany"] diff --git a/python/semantic_kernel/services/ai_service_client_base.py b/python/semantic_kernel/services/ai_service_client_base.py index 7eadc8d5f52b..2f3b1ff22fdb 100644 --- a/python/semantic_kernel/services/ai_service_client_base.py +++ b/python/semantic_kernel/services/ai_service_client_base.py @@ -1,13 +1,15 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC -from typing import Annotated +from typing import TYPE_CHECKING, Annotated from pydantic import Field, StringConstraints -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.kernel_pydantic import KernelBaseModel +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + class AIServiceClientBase(KernelBaseModel, ABC): """Base class for all AI Services. @@ -30,18 +32,22 @@ def model_post_init(self, __context: object | None = None): # Override this in subclass to return the proper prompt execution type the # service is expecting. - def get_prompt_execution_settings_class(self) -> type[PromptExecutionSettings]: + def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: """Get the request settings class.""" + from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings + return PromptExecutionSettings - def instantiate_prompt_execution_settings(self, **kwargs) -> PromptExecutionSettings: + def instantiate_prompt_execution_settings(self, **kwargs) -> "PromptExecutionSettings": """Create a request settings object. All arguments are passed to the constructor of the request settings object. """ return self.get_prompt_execution_settings_class()(**kwargs) - def get_prompt_execution_settings_from_settings(self, settings: PromptExecutionSettings) -> PromptExecutionSettings: + def get_prompt_execution_settings_from_settings( + self, settings: "PromptExecutionSettings" + ) -> "PromptExecutionSettings": """Get the request settings from a settings object.""" prompt_execution_settings_type = self.get_prompt_execution_settings_class() if isinstance(settings, prompt_execution_settings_type): diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 41575b36e337..b8e89896715a 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -1,13 +1,10 @@ # Copyright (c) Microsoft. All rights reserved. -import warnings from collections.abc import Callable from typing import TYPE_CHECKING from unittest.mock import MagicMock -import pytest - -from semantic_kernel.contents.function_call_content import FunctionCallContent +from pytest import fixture if TYPE_CHECKING: from semantic_kernel.contents.chat_history import ChatHistory @@ -17,40 +14,40 @@ from semantic_kernel.services.ai_service_client_base import AIServiceClientBase -@pytest.fixture(scope="function") +@fixture(scope="function") def kernel() -> "Kernel": from semantic_kernel.kernel import Kernel return Kernel() -@pytest.fixture(scope="session") +@fixture(scope="session") def service() -> "AIServiceClientBase": from semantic_kernel.services.ai_service_client_base import AIServiceClientBase return AIServiceClientBase(service_id="service", ai_model_id="ai_model_id") -@pytest.fixture(scope="session") +@fixture(scope="session") def default_service() -> "AIServiceClientBase": from semantic_kernel.services.ai_service_client_base import AIServiceClientBase return AIServiceClientBase(service_id="default", ai_model_id="ai_model_id") -@pytest.fixture(scope="function") +@fixture(scope="function") def kernel_with_service(kernel: "Kernel", service: "AIServiceClientBase") -> "Kernel": kernel.add_service(service) return kernel -@pytest.fixture(scope="function") +@fixture(scope="function") def kernel_with_default_service(kernel: "Kernel", default_service: "AIServiceClientBase") -> "Kernel": kernel.add_service(default_service) return kernel -@pytest.fixture(scope="session") +@fixture(scope="session") def not_decorated_native_function() -> Callable: def not_decorated_native_function(arg1: str) -> str: return "test" @@ -58,7 +55,7 @@ def not_decorated_native_function(arg1: str) -> str: return not_decorated_native_function -@pytest.fixture(scope="session") +@fixture(scope="session") def decorated_native_function() -> Callable: from semantic_kernel.functions.kernel_function_decorator import kernel_function @@ -69,7 +66,7 @@ def decorated_native_function(arg1: str) -> str: return decorated_native_function -@pytest.fixture(scope="session") +@fixture(scope="session") def custom_plugin_class(): from semantic_kernel.functions.kernel_function_decorator import kernel_function @@ -81,7 +78,7 @@ def decorated_native_function(self) -> str: return CustomPlugin -@pytest.fixture(scope="session") +@fixture(scope="session") def experimental_plugin_class(): from semantic_kernel.functions.kernel_function_decorator import kernel_function from semantic_kernel.utils.experimental_decorator import experimental_class @@ -95,7 +92,7 @@ def decorated_native_function(self) -> str: return ExperimentalPlugin -@pytest.fixture(scope="session") +@fixture(scope="session") def create_mock_function() -> Callable: from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.functions.function_result import FunctionResult @@ -137,8 +134,10 @@ async def _invoke_internal(self, context: "FunctionInvocationContext"): return create_mock_function -@pytest.fixture(scope="function") +@fixture(scope="function") def get_tool_call_mock(): + from semantic_kernel.contents.function_call_content import FunctionCallContent + tool_call_mock = MagicMock(spec=FunctionCallContent) tool_call_mock.split_name_dict.return_value = {"arg_name": "arg_value"} tool_call_mock.to_kernel_arguments.return_value = {"arg_name": "arg_value"} @@ -155,64 +154,64 @@ def get_tool_call_mock(): return tool_call_mock -@pytest.fixture(scope="function") +@fixture(scope="function") def chat_history() -> "ChatHistory": from semantic_kernel.contents.chat_history import ChatHistory return ChatHistory() -@pytest.fixture(autouse=True) -def enable_debug_mode(): - """Set `autouse=True` to enable easy debugging for tests. - - How to debug: - 1. Ensure [snoop](https://github.com/alexmojaki/snoop) is installed - (`pip install snoop`). - 2. If you're doing print based debugging, use `pr` instead of `print`. - That is, convert `print(some_var)` to `pr(some_var)`. - 3. If you want a trace of a particular functions calls, just add `ss()` as the first - line of the function. - - Note: - ---- - It's completely fine to leave `autouse=True` in the fixture. It doesn't affect - the tests unless you use `pr` or `ss` in any test. - - Note: - ---- - When you use `ss` or `pr` in a test, pylance or mypy will complain. This is - because they don't know that we're adding these functions to the builtins. The - tests will run fine though. - """ - import builtins - - try: - import snoop - except ImportError: - warnings.warn( - "Install snoop to enable trace debugging. `pip install snoop`", - ImportWarning, - ) - return - - builtins.ss = snoop.snoop(depth=4).__enter__ - builtins.pr = snoop.pp - - -@pytest.fixture +# @fixture(autouse=True) +# def enable_debug_mode(): +# """Set `autouse=True` to enable easy debugging for tests. + +# How to debug: +# 1. Ensure [snoop](https://github.com/alexmojaki/snoop) is installed +# (`pip install snoop`). +# 2. If you're doing print based debugging, use `pr` instead of `print`. +# That is, convert `print(some_var)` to `pr(some_var)`. +# 3. If you want a trace of a particular functions calls, just add `ss()` as the first +# line of the function. + +# Note: +# ---- +# It's completely fine to leave `autouse=True` in the fixture. It doesn't affect +# the tests unless you use `pr` or `ss` in any test. + +# Note: +# ---- +# When you use `ss` or `pr` in a test, pylance or mypy will complain. This is +# because they don't know that we're adding these functions to the builtins. The +# tests will run fine though. +# """ +# import builtins + +# try: +# import snoop +# except ImportError: +# warnings.warn( +# "Install snoop to enable trace debugging. `pip install snoop`", +# ImportWarning, +# ) +# return + +# builtins.ss = snoop.snoop(depth=4).__enter__ +# builtins.pr = snoop.pp + + +@fixture def exclude_list(request): """Fixture that returns a list of environment variables to exclude.""" return request.param if hasattr(request, "param") else [] -@pytest.fixture +@fixture def override_env_param_dict(request): """Fixture that returns a dict of environment variables to override.""" return request.param if hasattr(request, "param") else {} -@pytest.fixture() +@fixture() def azure_openai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for AzureOpenAISettings.""" if exclude_list is None: @@ -242,7 +241,7 @@ def azure_openai_unit_test_env(monkeypatch, exclude_list, override_env_param_dic return env_vars -@pytest.fixture() +@fixture() def openai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for OpenAISettings.""" if exclude_list is None: @@ -270,7 +269,7 @@ def openai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): return env_vars -@pytest.fixture() +@fixture() def mistralai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for MistralAISettings.""" if exclude_list is None: @@ -296,7 +295,7 @@ def mistralai_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): return env_vars -@pytest.fixture() +@fixture() def aca_python_sessions_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for ACA Python Unit Tests.""" if exclude_list is None: @@ -320,7 +319,7 @@ def aca_python_sessions_unit_test_env(monkeypatch, exclude_list, override_env_pa return env_vars -@pytest.fixture() +@fixture() def azure_ai_search_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for ACA Python Unit Tests.""" if exclude_list is None: @@ -346,7 +345,7 @@ def azure_ai_search_unit_test_env(monkeypatch, exclude_list, override_env_param_ return env_vars -@pytest.fixture() +@fixture() def bing_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for BingConnector.""" if exclude_list is None: @@ -371,7 +370,7 @@ def bing_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): return env_vars -@pytest.fixture() +@fixture() def google_search_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): """Fixture to set environment variables for the Google Search Connector.""" if exclude_list is None: @@ -394,3 +393,47 @@ def google_search_unit_test_env(monkeypatch, exclude_list, override_env_param_di monkeypatch.delenv(key, raising=False) return env_vars + + +@fixture +def qdrant_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for QdrantConnector.""" + if exclude_list is None: + exclude_list = [] + + if override_env_param_dict is None: + override_env_param_dict = {} + + env_vars = {"QDRANT_LOCATION": "http://localhost:6333"} + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars + + +@fixture +def redis_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for Redis.""" + if exclude_list is None: + exclude_list = [] + + if override_env_param_dict is None: + override_env_param_dict = {} + + env_vars = {"REDIS_CONNECTION_STRING": "redis://localhost:6379"} + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars diff --git a/python/tests/integration/connectors/memory/conftest.py b/python/tests/integration/connectors/memory/conftest.py index adbc03514e86..cd39b3fb46c8 100644 --- a/python/tests/integration/connectors/memory/conftest.py +++ b/python/tests/integration/connectors/memory/conftest.py @@ -1,15 +1,109 @@ # Copyright (c) Microsoft. All rights reserved. +from copy import deepcopy +from dataclasses import dataclass, field from datetime import datetime +from typing import Annotated, Any +from uuid import uuid4 import numpy as np -import pytest +from pytest import fixture +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) from semantic_kernel.memory.memory_record import MemoryRecord +raw_record = {"id": "testid", "content": "test content", "vector": [0.1, 0.2, 0.3, 0.4, 0.5]} -@pytest.fixture(scope="module") + +@fixture +def record(): + return deepcopy(raw_record) + + +def DataModelArray(record) -> tuple[type | None, VectorStoreRecordDefinition | None, Any]: + @vectorstoremodel + @dataclass + class MyDataModelArray: + vector: Annotated[ + np.ndarray | None, + VectorStoreRecordVectorField( + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + serialize_function=np.ndarray.tolist, + deserialize_function=np.array, + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + record["vector"] = np.array(record["vector"]) + + return MyDataModelArray, None, MyDataModelArray(**record) + + +def DataModelList(record) -> tuple[type | None, VectorStoreRecordDefinition | None, Any]: + @vectorstoremodel + @dataclass + class MyDataModelList: + vector: Annotated[ + list[float] | None, + VectorStoreRecordVectorField( + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + return MyDataModelList, None, MyDataModelList(**record) + + +def DataModelPandas(record) -> tuple[type | None, VectorStoreRecordDefinition | None, Any]: + import pandas as pd + + definition = VectorStoreRecordDefinition( + fields={ + "vector": VectorStoreRecordVectorField( + name="vector", + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + ), + "id": VectorStoreRecordKeyField(name="id"), + "content": VectorStoreRecordDataField( + name="content", has_embedding=True, embedding_property_name="vector", property_type="str" + ), + }, + container_mode=True, + ) + df = pd.DataFrame([record]) + return None, definition, df + + +@fixture(scope="module") +def models(record): + return [DataModelArray(record), DataModelList(record), DataModelPandas(record)] + + +@fixture(scope="module") def memory_record1(): return MemoryRecord( id="test_id1", @@ -23,7 +117,7 @@ def memory_record1(): ) -@pytest.fixture(scope="module") +@fixture(scope="module") def memory_record2(): return MemoryRecord( id="test_id2", @@ -37,7 +131,7 @@ def memory_record2(): ) -@pytest.fixture(scope="module") +@fixture(scope="module") def memory_record3(): return MemoryRecord( id="test_id3", diff --git a/python/tests/integration/connectors/memory/test_vector_collections.py b/python/tests/integration/connectors/memory/test_vector_collections.py new file mode 100644 index 000000000000..711387068ea4 --- /dev/null +++ b/python/tests/integration/connectors/memory/test_vector_collections.py @@ -0,0 +1,175 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from copy import deepcopy +from dataclasses import dataclass, field +from typing import Annotated +from uuid import uuid4 + +import numpy as np +import pytest +from pytest import fixture, mark, param + +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_store import AzureAISearchStore +from semantic_kernel.connectors.memory.qdrant.qdrant_store import QdrantStore +from semantic_kernel.connectors.memory.redis.const import RedisCollectionTypes +from semantic_kernel.connectors.memory.redis.redis_store import RedisStore +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) + +raw_record = { + "id": "e6103c03-487f-4d7d-9c23-4723651c17f4", + "content": "test content", + "vector": [0.1, 0.2, 0.3, 0.4, 0.5], +} + + +def record(): + return deepcopy(raw_record) + + +def DataModelArray(record) -> param: + @vectorstoremodel + @dataclass + class MyDataModelArray: + vector: Annotated[ + np.ndarray | None, + VectorStoreRecordVectorField( + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + serialize_function=np.ndarray.tolist, + deserialize_function=np.array, + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + record["vector"] = np.array(record["vector"]) + + return "array", MyDataModelArray, None, MyDataModelArray(**record) + + +def DataModelList(record) -> tuple: + @vectorstoremodel + @dataclass + class MyDataModelList: + vector: Annotated[ + list[float] | None, + VectorStoreRecordVectorField( + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + return "list", MyDataModelList, None, MyDataModelList(**record) + + +def DataModelPandas(record) -> tuple: + import pandas as pd + + definition = VectorStoreRecordDefinition( + fields={ + "vector": VectorStoreRecordVectorField( + name="vector", + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + ), + "id": VectorStoreRecordKeyField(name="id"), + "content": VectorStoreRecordDataField( + name="content", has_embedding=True, embedding_property_name="vector", property_type="str" + ), + }, + container_mode=True, + to_dict=lambda x: x.to_dict(orient="records"), + from_dict=lambda x, **_: pd.DataFrame(x), + ) + df = pd.DataFrame([record]) + return "pandas", pd.DataFrame, definition, df + + +@fixture +def collection_details(request): + match request.param: + case "array": + yield DataModelArray(record()) + case "list": + yield DataModelList(record()) + case "pandas": + yield DataModelPandas(record()) + + +@fixture +def store(request): + match request.param: + case "redis_json": + yield RedisStore(), {"collection_type": RedisCollectionTypes.JSON} + case "redis_hashset": + yield RedisStore(), {"collection_type": RedisCollectionTypes.HASHSET} + case "azure_ai_search": + yield AzureAISearchStore(), {} + case "qdrant": + yield QdrantStore(), {} + case "qdrant_in_memory": + yield QdrantStore(location=":memory:"), {} + case "qdrant_grpc": + yield QdrantStore(), {"prefer_grpc": True} + + +@fixture +@mark.asyncio +async def collection_and_data(store, collection_details): + vector_store, collection_options = store + collection_name, data_model_type, data_model_definition, data_record = collection_details + collection = vector_store.get_collection( + collection_name, data_model_type, data_model_definition, **collection_options + ) + try: + await collection.create_collection_if_not_exists() + except Exception as exc: + pytest.fail(f"Failed to create collection: {exc}") + yield collection, data_record + try: + await collection.delete_collection() + except Exception as exc: + pytest.fail(f"Failed to delete collection: {exc}") + + +@mark.asyncio +@mark.parametrize("collection_details", ["array", "list", "pandas"], indirect=True) +@mark.parametrize( + "store", + ["redis_json", "redis_hashset", "azure_ai_search", "qdrant", "qdrant_in_memory", "qdrant_grpc"], + indirect=True, +) +async def test_collections(collection_and_data): + compare_record = record() + async for collection, data_record in collection_and_data: + print("upserting record") + await collection.upsert(data_record) + print("getting record") + result = await collection.get(compare_record["id"]) + assert result is not None + print("deleting record") + await collection.delete(compare_record["id"]) + print("getting record again, expect None") + result = await collection.get(compare_record["id"]) + assert result is None diff --git a/python/tests/unit/connectors/memory/conftest.py b/python/tests/unit/connectors/memory/conftest.py new file mode 100644 index 000000000000..2a2f9265659e --- /dev/null +++ b/python/tests/unit/connectors/memory/conftest.py @@ -0,0 +1,68 @@ +# Copyright (c) Microsoft. All rights reserved. + +from dataclasses import dataclass, field +from typing import Annotated +from uuid import uuid4 + +from pydantic import BaseModel +from pytest import fixture + +from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( + OpenAIEmbeddingPromptExecutionSettings, +) +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) + + +@fixture +def dataclass_vector_data_model() -> object: + @vectorstoremodel + @dataclass + class MyDataModel: + vector: Annotated[ + list[float] | None, + VectorStoreRecordVectorField( + embedding_settings={"default": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)}, + index_kind="hnsw", + dimensions=1536, + distance_function="cosine", + property_type="float", + ), + ] = None + other: str | None = None + id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4())) + content: Annotated[ + str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str") + ] = "content1" + + return MyDataModel + + +@fixture +def data_model_definition() -> object: + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField( + has_embedding=True, + embedding_property_name="vector", + ), + "vector": VectorStoreRecordVectorField(dimensions=3), + } + ) + + +@fixture +def data_model_type(): + @vectorstoremodel + class DataModelClass(BaseModel): + content: Annotated[str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector")] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + + return DataModelClass diff --git a/python/tests/unit/connectors/memory/test_azure_ai_search.py b/python/tests/unit/connectors/memory/test_azure_ai_search.py new file mode 100644 index 000000000000..8e475d1faa58 --- /dev/null +++ b/python/tests/unit/connectors/memory/test_azure_ai_search.py @@ -0,0 +1,312 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from unittest.mock import MagicMock, Mock, patch + +from pytest import fixture, mark, raises + +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection import AzureAISearchCollection +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_settings import AzureAISearchSettings +from semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_store import AzureAISearchStore +from semantic_kernel.connectors.memory.azure_ai_search.utils import ( + SearchClientWrapper, + SearchIndexClientWrapper, + data_model_definition_to_azure_ai_search_index, + get_search_index_client, +) +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + MemoryConnectorInitializationError, +) +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError + +BASE_PATH_SEARCH_CLIENT = "azure.search.documents.aio.SearchClient" +BASE_PATH_INDEX_CLIENT = "azure.search.documents.indexes.aio.SearchIndexClient" + + +class AsyncIter: + def __init__(self, items): + self.items = items + + async def __aiter__(self): + for item in self.items: + yield item + + +@fixture +def vector_store(azure_ai_search_unit_test_env): + """Fixture to instantiate AzureCognitiveSearchMemoryStore with basic configuration.""" + return AzureAISearchStore() + + +@fixture +def mock_create_collection(): + """Fixture to patch 'SearchIndexClient' and its 'create_index' method.""" + with patch(f"{BASE_PATH_INDEX_CLIENT}.create_index") as mock_create_index: + yield mock_create_index + + +@fixture +def mock_delete_collection(): + """Fixture to patch 'SearchIndexClient' and its 'create_index' method.""" + with patch(f"{BASE_PATH_INDEX_CLIENT}.delete_index") as mock_delete_index: + yield mock_delete_index + + +@fixture +def mock_list_collection_names(): + """Fixture to patch 'SearchIndexClient' and its 'create_index' method.""" + with patch(f"{BASE_PATH_INDEX_CLIENT}.list_index_names") as mock_list_index_names: + # Setup the mock to return a specific SearchIndex instance when called + mock_list_index_names.return_value = AsyncIter(["test"]) + yield mock_list_index_names + + +@fixture +def mock_upsert(): + with patch(f"{BASE_PATH_SEARCH_CLIENT}.merge_or_upload_documents") as mock_merge_or_upload_documents: + from azure.search.documents.models import IndexingResult + + result = MagicMock(spec=IndexingResult) + result.key = "id1" + mock_merge_or_upload_documents.return_value = [result] + yield mock_merge_or_upload_documents + + +@fixture +def mock_get(): + with patch(f"{BASE_PATH_SEARCH_CLIENT}.get_document") as mock_get_document: + mock_get_document.return_value = {"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]} + yield mock_get_document + + +@fixture +def mock_delete(): + with patch(f"{BASE_PATH_SEARCH_CLIENT}.delete_documents") as mock_delete_documents: + yield mock_delete_documents + + +@fixture +def collection(azure_ai_search_unit_test_env, data_model_definition): + return AzureAISearchCollection(data_model_type=dict, data_model_definition=data_model_definition) + + +def test_init(azure_ai_search_unit_test_env, data_model_definition): + collection = AzureAISearchCollection(data_model_type=dict, data_model_definition=data_model_definition) + assert collection is not None + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.collection_name == "test-index-name" + assert collection.search_index_client is not None + assert collection.search_client is not None + + +def test_init_with_type(azure_ai_search_unit_test_env, data_model_type): + collection = AzureAISearchCollection(data_model_type=data_model_type) + assert collection is not None + assert collection.data_model_type is data_model_type + assert collection.collection_name == "test-index-name" + assert collection.search_index_client is not None + assert collection.search_client is not None + + +@mark.parametrize("exclude_list", [["AZURE_AI_SEARCH_ENDPOINT"]], indirect=True) +def test_init_endpoint_fail(azure_ai_search_unit_test_env, data_model_definition): + with raises(MemoryConnectorInitializationError): + AzureAISearchCollection( + data_model_type=dict, data_model_definition=data_model_definition, env_file_path="test.env" + ) + + +@mark.parametrize("exclude_list", [["AZURE_AI_SEARCH_INDEX_NAME"]], indirect=True) +def test_init_index_fail(azure_ai_search_unit_test_env, data_model_definition): + with raises(MemoryConnectorInitializationError): + AzureAISearchCollection( + data_model_type=dict, data_model_definition=data_model_definition, env_file_path="test.env" + ) + + +def test_init_with_clients(azure_ai_search_unit_test_env, data_model_definition): + search_index_client = MagicMock(spec=SearchIndexClientWrapper) + search_client = MagicMock(spec=SearchClientWrapper) + search_client._index_name = "test-index-name" + + collection = AzureAISearchCollection( + data_model_type=dict, + data_model_definition=data_model_definition, + search_index_client=search_index_client, + search_client=search_client, + ) + assert collection is not None + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.collection_name == "test-index-name" + assert collection.search_index_client == search_index_client + assert collection.search_client == search_client + + +def test_init_with_search_index_client(azure_ai_search_unit_test_env, data_model_definition): + search_index_client = MagicMock(spec=SearchIndexClientWrapper) + with patch( + "semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection.get_search_client" + ) as get_search_client: + search_client = MagicMock(spec=SearchClientWrapper) + get_search_client.return_value = search_client + + collection = AzureAISearchCollection( + data_model_type=dict, + data_model_definition=data_model_definition, + collection_name="test", + search_index_client=search_index_client, + ) + assert collection is not None + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.collection_name == "test" + assert collection.search_index_client == search_index_client + assert collection.search_client == search_client + + +def test_init_with_search_index_client_fail(azure_ai_search_unit_test_env, data_model_definition): + search_index_client = MagicMock(spec=SearchIndexClientWrapper) + with raises(MemoryConnectorInitializationError, match="Collection name is required."): + AzureAISearchCollection( + data_model_type=dict, + data_model_definition=data_model_definition, + search_index_client=search_index_client, + ) + + +def test_init_with_clients_fail(azure_ai_search_unit_test_env, data_model_definition): + search_index_client = MagicMock(spec=SearchIndexClientWrapper) + search_client = MagicMock(spec=SearchClientWrapper) + search_client._index_name = "test-index-name" + + with raises( + MemoryConnectorInitializationError, match="Search client and search index client have different index names." + ): + AzureAISearchCollection( + data_model_type=dict, + data_model_definition=data_model_definition, + collection_name="test", + search_index_client=search_index_client, + search_client=search_client, + ) + + +@mark.asyncio +async def test_upsert(collection, mock_upsert): + ids = await collection._inner_upsert({"id": "id1", "name": "test"}) + assert ids[0] == "id1" + + ids = await collection.upsert(record={"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]}) + assert ids == "id1" + + +@mark.asyncio +async def test_get(collection, mock_get): + records = await collection._inner_get(["id1"]) + assert records is not None + + records = await collection.get("id1") + assert records is not None + + +@mark.asyncio +async def test_delete(collection, mock_delete): + await collection._inner_delete(["id1"]) + + +@mark.asyncio +async def test_does_collection_exist(collection, mock_list_collection_names): + await collection.does_collection_exist() + + +@mark.asyncio +async def test_delete_collection(collection, mock_delete_collection): + await collection.delete_collection() + + +@mark.asyncio +async def test_create_index_from_index(collection, mock_create_collection): + from azure.search.documents.indexes.models import SearchIndex + + index = MagicMock(spec=SearchIndex) + await collection.create_collection(index=index) + + +@mark.asyncio +async def test_create_index_from_definition(collection, mock_create_collection): + from azure.search.documents.indexes.models import SearchIndex + + with patch( + "semantic_kernel.connectors.memory.azure_ai_search.azure_ai_search_collection.data_model_definition_to_azure_ai_search_index", + return_value=MagicMock(spec=SearchIndex), + ): + await collection.create_collection() + + +@mark.asyncio +async def test_create_index_from_index_fail(collection, mock_create_collection): + index = Mock() + with raises(MemoryConnectorException): + await collection.create_collection(index=index) + + +def test_data_model_definition_to_azure_ai_search_index(data_model_definition): + index = data_model_definition_to_azure_ai_search_index("test", data_model_definition) + assert index is not None + assert index.name == "test" + assert len(index.fields) == 3 + + +@mark.asyncio +@mark.parametrize("exclude_list", [["AZURE_AI_SEARCH_ENDPOINT"]], indirect=True) +async def test_vector_store_fail(azure_ai_search_unit_test_env): + with raises(MemoryConnectorInitializationError): + AzureAISearchStore(env_file_path="test.env") + + +@mark.asyncio +async def test_vector_store_list_collection_names(vector_store, mock_list_collection_names): + assert vector_store.search_index_client is not None + collection_names = await vector_store.list_collection_names() + assert collection_names == ["test"] + mock_list_collection_names.assert_called_once() + + +def test_get_collection(vector_store, data_model_definition): + collection = vector_store.get_collection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + assert collection is not None + assert collection.collection_name == "test" + assert collection.search_index_client == vector_store.search_index_client + assert collection.search_client is not None + assert collection.search_client._endpoint == vector_store.search_index_client._endpoint + assert vector_store.vector_record_collections["test"] == collection + + +@mark.parametrize("exclude_list", [["AZURE_AI_SEARCH_API_KEY"]], indirect=True) +def test_get_search_index_client(azure_ai_search_unit_test_env): + from azure.core.credentials import AzureKeyCredential, TokenCredential + + settings = AzureAISearchSettings.create(**azure_ai_search_unit_test_env, env_file_path="test.env") + + azure_credential = MagicMock(spec=AzureKeyCredential) + client = get_search_index_client(settings, azure_credential=azure_credential) + assert client is not None + assert client._credential == azure_credential + + token_credential = MagicMock(spec=TokenCredential) + client2 = get_search_index_client( + settings, + token_credential=token_credential, + ) + assert client2 is not None + assert client2._credential == token_credential + + with raises(ServiceInitializationError): + get_search_index_client(settings) diff --git a/python/tests/unit/connectors/memory/test_qdrant.py b/python/tests/unit/connectors/memory/test_qdrant.py new file mode 100644 index 000000000000..294fe729cdd4 --- /dev/null +++ b/python/tests/unit/connectors/memory/test_qdrant.py @@ -0,0 +1,271 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import MagicMock, patch + +from pytest import fixture, mark, raises +from qdrant_client.async_qdrant_client import AsyncQdrantClient +from qdrant_client.models import Datatype, Distance, VectorParams + +from semantic_kernel.connectors.memory.qdrant.qdrant_collection import QdrantCollection +from semantic_kernel.connectors.memory.qdrant.qdrant_store import QdrantStore +from semantic_kernel.data.vector_store_record_fields import VectorStoreRecordVectorField +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + MemoryConnectorInitializationError, + VectorStoreModelValidationError, +) + +BASE_PATH = "qdrant_client.async_qdrant_client.AsyncQdrantClient" + + +@fixture +def vector_store(qdrant_unit_test_env): + return QdrantStore(env_file_path="test.env") + + +@fixture +def collection(qdrant_unit_test_env, data_model_definition): + return QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + + +@fixture +def collection_without_named_vectors(qdrant_unit_test_env, data_model_definition): + return QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + named_vectors=False, + env_file_path="test.env", + ) + + +@fixture(autouse=True) +def mock_list_collection_names(): + with patch(f"{BASE_PATH}.get_collections") as mock_get_collections: + from qdrant_client.conversions.common_types import CollectionsResponse + from qdrant_client.http.models import CollectionDescription + + response = MagicMock(spec=CollectionsResponse) + response.collections = [CollectionDescription(name="test")] + mock_get_collections.return_value = response + yield mock_get_collections + + +@fixture(autouse=True) +def mock_does_collection_exist(): + with patch(f"{BASE_PATH}.collection_exists") as mock_collection_exists: + mock_collection_exists.return_value = True + yield mock_collection_exists + + +@fixture(autouse=True) +def mock_create_collection(): + with patch(f"{BASE_PATH}.create_collection") as mock_recreate_collection: + yield mock_recreate_collection + + +@fixture(autouse=True) +def mock_delete_collection(): + with patch(f"{BASE_PATH}.delete_collection") as mock_delete_collection: + mock_delete_collection.return_value = True + yield mock_delete_collection + + +@fixture(autouse=True) +def mock_upsert(): + with patch(f"{BASE_PATH}.upsert") as mock_upsert: + from qdrant_client.conversions.common_types import UpdateResult + + result = MagicMock(spec=UpdateResult) + result.status = "completed" + mock_upsert.return_value = result + yield mock_upsert + + +@fixture(autouse=True) +def mock_get(collection): + with patch(f"{BASE_PATH}.retrieve") as mock_retrieve: + from qdrant_client.http.models import Record + + if collection.named_vectors: + mock_retrieve.return_value = [ + Record(id="id1", payload={"content": "content"}, vector={"vector": [1.0, 2.0, 3.0]}) + ] + else: + mock_retrieve.return_value = [Record(id="id1", payload={"content": "content"}, vector=[1.0, 2.0, 3.0])] + yield mock_retrieve + + +@fixture(autouse=True) +def mock_delete(): + with patch(f"{BASE_PATH}.delete") as mock_delete: + yield mock_delete + + +def test_vector_store_defaults(vector_store): + assert vector_store.qdrant_client is not None + assert vector_store.qdrant_client._client.rest_uri == "http://localhost:6333" + + +def test_vector_store_with_client(): + qdrant_store = QdrantStore(client=AsyncQdrantClient()) + assert qdrant_store.qdrant_client is not None + assert qdrant_store.qdrant_client._client.rest_uri == "http://localhost:6333" + + +@mark.parametrize("exclude_list", [["QDRANT_LOCATION"]], indirect=True) +def test_vector_store_in_memory(qdrant_unit_test_env): + from qdrant_client.local.async_qdrant_local import AsyncQdrantLocal + + qdrant_store = QdrantStore(api_key="supersecretkey", env_file_path="test.env") + assert qdrant_store.qdrant_client is not None + assert isinstance(qdrant_store.qdrant_client._client, AsyncQdrantLocal) + assert qdrant_store.qdrant_client._client.location == ":memory:" + + +def test_vector_store_fail(): + with raises(MemoryConnectorInitializationError, match="Failed to create Qdrant settings."): + QdrantStore(location="localhost", url="localhost", env_file_path="test.env") + + with raises(MemoryConnectorInitializationError, match="Failed to create Qdrant client."): + QdrantStore(location="localhost", url="http://localhost", env_file_path="test.env") + + +@mark.asyncio +async def test_store_list_collection_names(vector_store): + collections = await vector_store.list_collection_names() + assert collections == ["test"] + + +def test_get_collection(vector_store, data_model_definition, qdrant_unit_test_env): + collection = vector_store.get_collection("test", data_model_type=dict, data_model_definition=data_model_definition) + assert collection.collection_name == "test" + assert collection.qdrant_client == vector_store.qdrant_client + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert vector_store.vector_record_collections["test"] == collection + + +def test_collection_init(data_model_definition, qdrant_unit_test_env): + collection = QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + assert collection.collection_name == "test" + assert collection.qdrant_client is not None + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.named_vectors + + +def test_collection_init_fail(data_model_definition): + with raises(MemoryConnectorInitializationError, match="Failed to create Qdrant settings."): + QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + url="localhost", + env_file_path="test.env", + ) + with raises(MemoryConnectorInitializationError, match="Failed to create Qdrant client."): + QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + location="localhost", + url="http://localhost", + env_file_path="test.env", + ) + with raises( + VectorStoreModelValidationError, match="Only one vector field is allowed when not using named vectors." + ): + data_model_definition.fields["vector2"] = VectorStoreRecordVectorField(name="vector2", dimensions=3) + QdrantCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + named_vectors=False, + env_file_path="test.env", + ) + + +@mark.asyncio +@mark.parametrize("collection_to_use", ["collection", "collection_without_named_vectors"]) +async def test_upsert(collection_to_use, request): + from qdrant_client.models import PointStruct + + collection = request.getfixturevalue(collection_to_use) + if collection.named_vectors: + record = PointStruct(id="id1", payload={"content": "content"}, vector={"vector": [1.0, 2.0, 3.0]}) + else: + record = PointStruct(id="id1", payload={"content": "content"}, vector=[1.0, 2.0, 3.0]) + ids = await collection._inner_upsert([record]) + assert ids[0] == "id1" + + ids = await collection.upsert(record={"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]}) + assert ids == "id1" + + +@mark.asyncio +async def test_get(collection): + records = await collection._inner_get(["id1"]) + assert records is not None + + records = await collection.get("id1") + assert records is not None + + +@mark.asyncio +async def test_delete(collection): + await collection._inner_delete(["id1"]) + + +@mark.asyncio +async def test_does_collection_exist(collection): + await collection.does_collection_exist() + + +@mark.asyncio +async def test_delete_collection(collection): + await collection.delete_collection() + + +@mark.asyncio +@mark.parametrize( + "collection_to_use, results", + [ + ( + "collection", + { + "collection_name": "test", + "vectors_config": {"vector": VectorParams(size=3, distance=Distance.COSINE, datatype=Datatype.FLOAT32)}, + }, + ), + ( + "collection_without_named_vectors", + { + "collection_name": "test", + "vectors_config": VectorParams(size=3, distance=Distance.COSINE, datatype=Datatype.FLOAT32), + }, + ), + ], +) +async def test_create_index_with_named_vectors(collection_to_use, results, mock_create_collection, request): + await request.getfixturevalue(collection_to_use).create_collection() + mock_create_collection.assert_called_once_with(**results) + + +@mark.asyncio +@mark.parametrize("collection_to_use", ["collection", "collection_without_named_vectors"]) +async def test_create_index_fail(collection_to_use, request): + collection = request.getfixturevalue(collection_to_use) + collection.data_model_definition.fields["vector"].dimensions = None + with raises(MemoryConnectorException, match="Vector field must have dimensions."): + await collection.create_collection() diff --git a/python/tests/unit/connectors/memory/test_redis_store.py b/python/tests/unit/connectors/memory/test_redis_store.py new file mode 100644 index 000000000000..f233bbc73e9d --- /dev/null +++ b/python/tests/unit/connectors/memory/test_redis_store.py @@ -0,0 +1,341 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, patch + +import numpy as np +from pytest import fixture, mark, raises +from redis.asyncio.client import Redis + +from semantic_kernel.connectors.memory.redis.const import RedisCollectionTypes +from semantic_kernel.connectors.memory.redis.redis_collection import RedisHashsetCollection, RedisJsonCollection +from semantic_kernel.connectors.memory.redis.redis_store import RedisStore +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + MemoryConnectorInitializationError, +) + +BASE_PATH = "redis.asyncio.client.Redis" +BASE_PATH_FT = "redis.commands.search.AsyncSearch" +BASE_PATH_JSON = "redis.commands.json.commands.JSONCommands" + + +@fixture +def vector_store(redis_unit_test_env): + return RedisStore(env_file_path="test.env") + + +@fixture +def collection_hash(redis_unit_test_env, data_model_definition): + return RedisHashsetCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + + +@fixture +def collection_json(redis_unit_test_env, data_model_definition): + return RedisJsonCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + + +@fixture +def collection_with_prefix_hash(redis_unit_test_env, data_model_definition): + return RedisHashsetCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + prefix_collection_name_to_key_names=True, + env_file_path="test.env", + ) + + +@fixture +def collection_with_prefix_json(redis_unit_test_env, data_model_definition): + return RedisJsonCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + prefix_collection_name_to_key_names=True, + env_file_path="test.env", + ) + + +@fixture(autouse=True) +def moc_list_collection_names(): + with patch(f"{BASE_PATH}.execute_command") as mock_get_collections: + mock_get_collections.return_value = [b"test"] + yield mock_get_collections + + +@fixture(autouse=True) +def mock_does_collection_exist(): + with patch(f"{BASE_PATH_FT}.info", new=AsyncMock()) as mock_collection_exists: + mock_collection_exists.return_value = True + yield mock_collection_exists + + +@fixture(autouse=True) +def mock_create_collection(): + with patch(f"{BASE_PATH_FT}.create_index", new=AsyncMock()) as mock_recreate_collection: + yield mock_recreate_collection + + +@fixture(autouse=True) +def mock_delete_collection(): + with patch(f"{BASE_PATH_FT}.dropindex", new=AsyncMock()) as mock_delete_collection: + yield mock_delete_collection + + +@fixture(autouse=True) +def mock_upsert_hash(): + with patch(f"{BASE_PATH}.hset", new=AsyncMock()) as mock_upsert: + yield mock_upsert + + +@fixture(autouse=True) +def mock_upsert_json(): + with patch(f"{BASE_PATH_JSON}.set", new=AsyncMock()) as mock_upsert: + yield mock_upsert + + +@fixture(autouse=True) +def mock_get_hash(): + with patch(f"{BASE_PATH}.hgetall", new=AsyncMock()) as mock_get: + mock_get.return_value = { + b"metadata": b'{"content": "content"}', + b"vector": np.array([1.0, 2.0, 3.0]).tobytes(), + } + yield mock_get + + +@fixture(autouse=True) +def mock_get_json(): + with patch(f"{BASE_PATH_JSON}.mget", new=AsyncMock()) as mock_get: + mock_get.return_value = [ + [ + { + "content": "content", + "vector": [1.0, 2.0, 3.0], + } + ] + ] + yield mock_get + + +@fixture(autouse=True) +def mock_delete_hash(): + with patch(f"{BASE_PATH}.delete", new=AsyncMock()) as mock_delete: + yield mock_delete + + +@fixture(autouse=True) +def mock_delete_json(): + with patch(f"{BASE_PATH_JSON}.delete", new=AsyncMock()) as mock_delete: + yield mock_delete + + +def test_vector_store_defaults(vector_store): + assert vector_store.redis_database is not None + assert vector_store.redis_database.connection_pool.connection_kwargs["host"] == "localhost" + + +def test_vector_store_with_client(redis_unit_test_env): + vector_store = RedisStore(redis_database=Redis.from_url(redis_unit_test_env["REDIS_CONNECTION_STRING"])) + assert vector_store.redis_database is not None + assert vector_store.redis_database.connection_pool.connection_kwargs["host"] == "localhost" + + +@mark.parametrize("exclude_list", [["REDIS_CONNECTION_STRING"]], indirect=True) +def test_vector_store_fail(redis_unit_test_env): + with raises(MemoryConnectorInitializationError, match="Failed to create Redis settings."): + RedisStore(env_file_path="test.env") + + +@mark.asyncio +async def test_store_list_collection_names(vector_store, moc_list_collection_names): + collections = await vector_store.list_collection_names() + assert collections == ["test"] + + +@mark.parametrize("type_", ["hashset", "json"]) +def test_get_collection(vector_store, data_model_definition, type_): + if type_ == "hashset": + collection = vector_store.get_collection( + "test", + data_model_type=dict, + data_model_definition=data_model_definition, + collection_type=RedisCollectionTypes.HASHSET, + ) + assert isinstance(collection, RedisHashsetCollection) + else: + collection = vector_store.get_collection( + "test", + data_model_type=dict, + data_model_definition=data_model_definition, + collection_type=RedisCollectionTypes.JSON, + ) + assert isinstance(collection, RedisJsonCollection) + assert collection.collection_name == "test" + assert collection.redis_database == vector_store.redis_database + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert vector_store.vector_record_collections["test"] == collection + + +@mark.parametrize("type_", ["hashset", "json"]) +def test_collection_init(redis_unit_test_env, data_model_definition, type_): + if type_ == "hashset": + collection = RedisHashsetCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + else: + collection = RedisJsonCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + assert collection.collection_name == "test" + assert collection.redis_database is not None + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.prefix_collection_name_to_key_names is False + + +@mark.parametrize("type_", ["hashset", "json"]) +def test_init_with_type(redis_unit_test_env, data_model_type, type_): + if type_ == "hashset": + collection = RedisHashsetCollection(data_model_type=data_model_type, collection_name="test") + else: + collection = RedisJsonCollection(data_model_type=data_model_type, collection_name="test") + assert collection is not None + assert collection.data_model_type is data_model_type + assert collection.collection_name == "test" + + +@mark.parametrize("exclude_list", [["REDIS_CONNECTION_STRING"]], indirect=True) +def test_collection_fail(redis_unit_test_env, data_model_definition): + with raises(MemoryConnectorInitializationError, match="Failed to create Redis settings."): + RedisHashsetCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + with raises(MemoryConnectorInitializationError, match="Failed to create Redis settings."): + RedisJsonCollection( + data_model_type=dict, + collection_name="test", + data_model_definition=data_model_definition, + env_file_path="test.env", + ) + + +@mark.asyncio +@mark.parametrize("type_", ["hashset", "json"]) +async def test_upsert(collection_hash, collection_json, type_): + if type_ == "hashset": + record = { + "name": "id1", + "mapping": { + "metadata": {"content": "content"}, + "vector": [1.0, 2.0, 3.0], + }, + } + else: + record = { + "name": "id1", + "value": { + "content": "content", + "vector": [1.0, 2.0, 3.0], + }, + } + collection = collection_hash if type_ == "hashset" else collection_json + ids = await collection._inner_upsert([record]) + assert ids[0] == "id1" + + ids = await collection.upsert(record={"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]}) + assert ids == "id1" + + +@mark.asyncio +async def test_upsert_with_prefix(collection_with_prefix_hash, collection_with_prefix_json): + ids = await collection_with_prefix_hash.upsert( + record={"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]} + ) + assert ids == "id1" + ids = await collection_with_prefix_json.upsert( + record={"id": "id1", "content": "content", "vector": [1.0, 2.0, 3.0]} + ) + assert ids == "id1" + + +@mark.asyncio +@mark.parametrize("prefix", [True, False]) +@mark.parametrize("type_", ["hashset", "json"]) +async def test_get( + collection_hash, collection_json, collection_with_prefix_hash, collection_with_prefix_json, type_, prefix +): + if prefix: + collection = collection_with_prefix_hash if type_ == "hashset" else collection_with_prefix_json + else: + collection = collection_hash if type_ == "hashset" else collection_json + records = await collection._inner_get(["id1"]) + assert records is not None + + records = await collection.get("id1") + assert records is not None + + +@mark.asyncio +@mark.parametrize("type_", ["hashset", "json"]) +async def test_delete(collection_hash, collection_json, type_): + collection = collection_hash if type_ == "hashset" else collection_json + await collection._inner_delete(["id1"]) + + +@mark.asyncio +async def test_does_collection_exist(collection_hash, mock_does_collection_exist): + await collection_hash.does_collection_exist() + + +@mark.asyncio +async def test_does_collection_exist_false(collection_hash, mock_does_collection_exist): + mock_does_collection_exist.side_effect = Exception + exists = await collection_hash.does_collection_exist() + assert not exists + + +@mark.asyncio +async def test_delete_collection(collection_hash, mock_delete_collection): + await collection_hash.delete_collection() + await collection_hash.delete_collection() + + +@mark.asyncio +async def test_create_index(collection_hash, mock_create_collection): + await collection_hash.create_collection() + + +@mark.asyncio +async def test_create_index_manual(collection_hash, mock_create_collection): + from redis.commands.search.indexDefinition import IndexDefinition, IndexType + + fields = ["fields"] + index_definition = IndexDefinition(prefix="test:", index_type=IndexType.HASH) + await collection_hash.create_collection(index_definition=index_definition, fields=fields) + + +@mark.asyncio +async def test_create_index_fail(collection_hash, mock_create_collection): + with raises(MemoryConnectorException, match="Invalid index type supplied."): + await collection_hash.create_collection(index_definition="index_definition", fields="fields") diff --git a/python/tests/unit/connectors/memory/test_volatile.py b/python/tests/unit/connectors/memory/test_volatile.py new file mode 100644 index 000000000000..84ed8e86087c --- /dev/null +++ b/python/tests/unit/connectors/memory/test_volatile.py @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft. All rights reserved. + +from pytest import fixture, mark + +from semantic_kernel.connectors.memory.volatile.volatile_collection import VolatileCollection +from semantic_kernel.connectors.memory.volatile.volatile_store import VolatileStore + + +@fixture +def collection(data_model_definition): + return VolatileCollection("test", dict, data_model_definition) + + +def test_store_init(): + store = VolatileStore() + assert store.vector_record_collections == {} + + +@mark.asyncio +async def test_store_get_collection(data_model_definition): + store = VolatileStore() + collection = store.get_collection("test", dict, data_model_definition) + assert collection.collection_name == "test" + assert collection.data_model_type is dict + assert collection.data_model_definition == data_model_definition + assert collection.inner_storage == {} + assert (await store.list_collection_names()) == ["test"] + + +@mark.asyncio +async def test_upsert(collection): + record = {"id": "testid", "content": "test content", "vector": [0.1, 0.2, 0.3, 0.4, 0.5]} + key = await collection.upsert(record) + assert key == "testid" + assert collection.inner_storage == {"testid": record} + + +@mark.asyncio +async def test_get(collection): + record = {"id": "testid", "content": "test content", "vector": [0.1, 0.2, 0.3, 0.4, 0.5]} + await collection.upsert(record) + result = await collection.get("testid") + assert result == record + + +@mark.asyncio +async def test_get_missing(collection): + result = await collection.get("testid") + assert result is None + + +@mark.asyncio +async def test_delete(collection): + record = {"id": "testid", "content": "test content", "vector": [0.1, 0.2, 0.3, 0.4, 0.5]} + await collection.upsert(record) + await collection.delete("testid") + assert collection.inner_storage == {} + + +@mark.asyncio +async def test_does_collection_exist(collection): + assert await collection.does_collection_exist() is True + + +@mark.asyncio +async def test_delete_collection(collection): + record = {"id": "testid", "content": "test content", "vector": [0.1, 0.2, 0.3, 0.4, 0.5]} + await collection.upsert(record) + assert collection.inner_storage == {"testid": record} + await collection.delete_collection() + assert collection.inner_storage == {} + + +@mark.asyncio +async def test_create_collection(collection): + await collection.create_collection() diff --git a/python/tests/unit/connectors/test_function_choice_behavior.py b/python/tests/unit/connectors/test_function_choice_behavior.py index 5d8c6bd2301a..89e211881c08 100644 --- a/python/tests/unit/connectors/test_function_choice_behavior.py +++ b/python/tests/unit/connectors/test_function_choice_behavior.py @@ -32,20 +32,20 @@ def update_settings_callback(): def test_function_choice_behavior_auto(): behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) - assert behavior.type == FunctionChoiceType.AUTO + assert behavior.type_ == FunctionChoiceType.AUTO assert behavior.maximum_auto_invoke_attempts == DEFAULT_MAX_AUTO_INVOKE_ATTEMPTS def test_function_choice_behavior_none_invoke(): behavior = FunctionChoiceBehavior.NoneInvoke() - assert behavior.type == FunctionChoiceType.NONE + assert behavior.type_ == FunctionChoiceType.NONE assert behavior.maximum_auto_invoke_attempts == 0 def test_function_choice_behavior_required(): expected_filters = {"included_functions": ["plugin1-func1"]} behavior = FunctionChoiceBehavior.Required(auto_invoke=True, filters=expected_filters) - assert behavior.type == FunctionChoiceType.REQUIRED + assert behavior.type_ == FunctionChoiceType.REQUIRED assert behavior.maximum_auto_invoke_attempts == 1 assert behavior.filters == expected_filters @@ -53,14 +53,14 @@ def test_function_choice_behavior_required(): def test_from_function_call_behavior_kernel_functions(): behavior = FunctionCallBehavior.AutoInvokeKernelFunctions() new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type == FunctionChoiceType.AUTO + assert new_behavior.type_ == FunctionChoiceType.AUTO assert new_behavior.auto_invoke_kernel_functions is True def test_from_function_call_behavior_required(): behavior = FunctionCallBehavior.RequiredFunction(auto_invoke=True, function_fully_qualified_name="plugin1-func1") new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type == FunctionChoiceType.REQUIRED + assert new_behavior.type_ == FunctionChoiceType.REQUIRED assert new_behavior.auto_invoke_kernel_functions is True assert new_behavior.filters == {"included_functions": ["plugin1-func1"]} @@ -69,7 +69,7 @@ def test_from_function_call_behavior_enabled_functions(): expected_filters = {"included_functions": ["plugin1-func1"]} behavior = FunctionCallBehavior.EnableFunctions(auto_invoke=True, filters=expected_filters) new_behavior = FunctionChoiceBehavior.from_function_call_behavior(behavior) - assert new_behavior.type == FunctionChoiceType.AUTO + assert new_behavior.type_ == FunctionChoiceType.AUTO assert new_behavior.auto_invoke_kernel_functions is True assert new_behavior.filters == expected_filters @@ -90,7 +90,7 @@ def test_auto_function_choice_behavior_from_dict(type: str, max_auto_invoke_atte "maximum_auto_invoke_attempts": max_auto_invoke_attempts, } behavior = FunctionChoiceBehavior.from_dict(data) - assert behavior.type == FunctionChoiceType(type) + assert behavior.type_ == FunctionChoiceType(type) assert behavior.filters == {"included_functions": ["plugin1-func1", "plugin2-func2"]} assert behavior.maximum_auto_invoke_attempts == max_auto_invoke_attempts @@ -106,7 +106,7 @@ def test_auto_function_choice_behavior_from_dict_with_same_filters_and_functions "maximum_auto_invoke_attempts": max_auto_invoke_attempts, } behavior = FunctionChoiceBehavior.from_dict(data) - assert behavior.type == FunctionChoiceType(type) + assert behavior.type_ == FunctionChoiceType(type) assert behavior.filters == {"included_functions": ["plugin1-func1", "plugin2-func2"]} assert behavior.maximum_auto_invoke_attempts == max_auto_invoke_attempts @@ -122,7 +122,7 @@ def test_auto_function_choice_behavior_from_dict_with_different_filters_and_func "maximum_auto_invoke_attempts": max_auto_invoke_attempts, } behavior = FunctionChoiceBehavior.from_dict(data) - assert behavior.type == FunctionChoiceType(type) + assert behavior.type_ == FunctionChoiceType(type) assert behavior.filters == {"included_functions": ["plugin1-func1", "plugin2-func2", "plugin3-func3"]} assert behavior.maximum_auto_invoke_attempts == max_auto_invoke_attempts diff --git a/python/tests/unit/data/conftest.py b/python/tests/unit/data/conftest.py new file mode 100644 index 000000000000..fd8532dc1896 --- /dev/null +++ b/python/tests/unit/data/conftest.py @@ -0,0 +1,312 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from typing import Annotated, Any + +import numpy as np +from pydantic import BaseModel, Field +from pytest import fixture + +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) + + +@fixture +def DictVectorStoreRecordCollection(): + class DictVectorStoreRecordCollection(VectorStoreRecordCollection[str, Any]): + inner_storage: dict[str, Any] = Field(default_factory=dict) + + async def _inner_delete(self, keys: Sequence[str], **kwargs: Any) -> None: + for key in keys: + self.inner_storage.pop(key, None) + + async def _inner_get(self, keys: Sequence[str], **kwargs: Any) -> Any | Sequence[Any] | None: + return [self.inner_storage[key] for key in keys if key in self.inner_storage] + + async def _inner_upsert(self, records: Sequence[Any], **kwargs: Any) -> Sequence[str]: + updated_keys = [] + for record in records: + key = ( + record[self._key_field_name] + if isinstance(record, Mapping) + else getattr(record, self._key_field_name) + ) + self.inner_storage[key] = record + updated_keys.append(key) + return updated_keys + + def _deserialize_store_models_to_dicts(self, records: Sequence[Any], **kwargs: Any) -> Sequence[dict[str, Any]]: + return records + + def _serialize_dicts_to_store_models(self, records: Sequence[dict[str, Any]], **kwargs: Any) -> Sequence[Any]: + return records + + async def create_collection(self, **kwargs: Any) -> None: + pass + + async def delete_collection(self, **kwargs: Any) -> None: + self.inner_storage = {} + + async def does_collection_exist(self, **kwargs: Any) -> bool: + return True + + return DictVectorStoreRecordCollection + + +@fixture +def data_model_definition() -> object: + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector"), + "vector": VectorStoreRecordVectorField(), + } + ) + + +@fixture +def data_model_serialize_definition() -> object: + def serialize(record, **kwargs): + return record + + def deserialize(records, **kwargs): + return records + + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(), + "vector": VectorStoreRecordVectorField(), + }, + serialize=serialize, + deserialize=deserialize, + ) + + +@fixture +def data_model_to_from_dict_definition() -> object: + def to_dict(record, **kwargs): + return record + + def from_dict(records, **kwargs): + return records + + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(), + "vector": VectorStoreRecordVectorField(), + }, + to_dict=to_dict, + from_dict=from_dict, + ) + + +@fixture +def data_model_container_definition() -> object: + def to_dict(record: dict[str, dict[str, Any]], **kwargs) -> list[dict[str, Any]]: + return [{"id": key} | value for key, value in record.items()] + + def from_dict(records: list[dict[str, Any]], **kwargs) -> dict[str, dict[str, Any]]: + ret = {} + for record in records: + id = record.pop("id") + ret[id] = record + return ret + + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(), + "vector": VectorStoreRecordVectorField(), + }, + container_mode=True, + to_dict=to_dict, + from_dict=from_dict, + ) + + +@fixture +def data_model_container_serialize_definition() -> object: + def serialize(record: dict[str, dict[str, Any]], **kwargs) -> list[dict[str, Any]]: + return [{"id": key} | value for key, value in record.items()] + + def deserialize(records: list[dict[str, Any]], **kwargs) -> dict[str, dict[str, Any]]: + ret = {} + for record in records: + id = record.pop("id") + ret[id] = record + return ret + + return VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(), + "vector": VectorStoreRecordVectorField(), + }, + container_mode=True, + serialize=serialize, + deserialize=deserialize, + ) + + +@fixture +def data_model_pandas_definition() -> object: + from pandas import DataFrame + + return VectorStoreRecordDefinition( + fields={ + "vector": VectorStoreRecordVectorField( + name="vector", + index_kind="hnsw", + dimensions=5, + distance_function="cosine", + property_type="float", + ), + "id": VectorStoreRecordKeyField(name="id"), + "content": VectorStoreRecordDataField( + name="content", + has_embedding=True, + embedding_property_name="vector", + property_type="str", + ), + }, + container_mode=True, + to_dict=lambda x: x.to_dict(orient="records"), + from_dict=lambda x, **_: DataFrame(x), + ) + + +@fixture +def data_model_type_vanilla(): + @vectorstoremodel + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + vector: Annotated[list[float], VectorStoreRecordVectorField()], + id: Annotated[str, VectorStoreRecordKeyField()], + ): + self.content = content + self.vector = vector + self.id = id + + def __eq__(self, other) -> bool: + return self.content == other.content and self.id == other.id and self.vector == other.vector + + return DataModelClass + + +@fixture +def data_model_type_vector_array(): + @vectorstoremodel + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + vector: Annotated[ + np.array, + VectorStoreRecordVectorField( + serialize_function=np.ndarray.tolist, + deserialize_function=np.array, + ), + ], + id: Annotated[str, VectorStoreRecordKeyField()], + ): + self.content = content + self.vector = vector + self.id = id + + def __eq__(self, other) -> bool: + return self.content == other.content and self.id == other.id and self.vector == other.vector + + return DataModelClass + + +@fixture +def data_model_type_vanilla_serialize(): + @vectorstoremodel + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + vector: Annotated[list[float], VectorStoreRecordVectorField()], + id: Annotated[str, VectorStoreRecordKeyField()], + ): + self.content = content + self.vector = vector + self.id = id + + def serialize(self, **kwargs: Any) -> Any: + """Serialize the object to the format required by the data store.""" + return {"id": self.id, "content": self.content, "vector": self.vector} + + @classmethod + def deserialize(cls, obj: Any, **kwargs: Any): + """Deserialize the output of the data store to an object.""" + return cls(**obj) + + def __eq__(self, other) -> bool: + return self.content == other.content and self.id == other.id and self.vector == other.vector + + return DataModelClass + + +@fixture +def data_model_type_vanilla_to_from_dict(): + @vectorstoremodel + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + vector: Annotated[list[float], VectorStoreRecordVectorField()], + id: Annotated[str, VectorStoreRecordKeyField()], + ): + self.content = content + self.vector = vector + self.id = id + + def to_dict(self, **kwargs: Any) -> Any: + """Serialize the object to the format required by the data store.""" + return {"id": self.id, "content": self.content, "vector": self.vector} + + @classmethod + def from_dict(cls, *args: Any, **kwargs: Any): + """Deserialize the output of the data store to an object.""" + return cls(**args[0]) + + def __eq__(self, other) -> bool: + return self.content == other.content and self.id == other.id and self.vector == other.vector + + return DataModelClass + + +@fixture +def data_model_type_pydantic(): + @vectorstoremodel + class DataModelClass(BaseModel): + content: Annotated[str, VectorStoreRecordDataField()] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + + return DataModelClass + + +@fixture +def data_model_type_dataclass(): + @vectorstoremodel + @dataclass + class DataModelClass: + content: Annotated[str, VectorStoreRecordDataField()] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + + return DataModelClass diff --git a/python/tests/unit/data/test_vector_store_model_decorator.py b/python/tests/unit/data/test_vector_store_model_decorator.py new file mode 100644 index 000000000000..b690e18dbb78 --- /dev/null +++ b/python/tests/unit/data/test_vector_store_model_decorator.py @@ -0,0 +1,224 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from dataclasses import dataclass +from typing import Annotated + +from pydantic import BaseModel +from pydantic.dataclasses import dataclass as pydantic_dataclass +from pytest import raises + +from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException + + +def test_vanilla(): + @vectorstoremodel + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + content2: Annotated[str, VectorStoreRecordDataField], + vector: Annotated[list[float], VectorStoreRecordVectorField()], + id: Annotated[str, VectorStoreRecordKeyField()], + non_vector_store_content: str | None = None, + optional_content: Annotated[str | None, VectorStoreRecordDataField()] = None, + annotated_content: Annotated[str | None, "description"] = None, + ): + self.content = content + self.content2 = content2 + self.vector = vector + self.id = id + self.optional_content = optional_content + self.non_vector_store_content = non_vector_store_content + self.annotated_content = annotated_content + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 5 + assert data_model_definition.fields["content"].name == "content" + assert data_model_definition.fields["content"].property_type == "str" + assert data_model_definition.fields["content2"].name == "content2" + assert data_model_definition.fields["content2"].property_type == "str" + assert data_model_definition.fields["vector"].name == "vector" + assert data_model_definition.fields["id"].name == "id" + assert data_model_definition.fields["optional_content"].name == "optional_content" + assert data_model_definition.fields["optional_content"].property_type == "str" + assert data_model_definition.key_field_name == "id" + assert data_model_definition.container_mode is False + assert data_model_definition.vector_field_names == ["vector"] + + +def test_vanilla_2(): + @vectorstoremodel() + class DataModelClass: + def __init__( + self, + content: Annotated[str, VectorStoreRecordDataField()], + id: Annotated[str, VectorStoreRecordKeyField()], + ): + self.content = content + self.id = id + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 2 + + +def test_dataclass(): + @vectorstoremodel + @dataclass + class DataModelClass: + content: Annotated[str, VectorStoreRecordDataField()] + content2: Annotated[str, VectorStoreRecordDataField] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + non_vector_store_content: str | None = None + optional_content: Annotated[str | None, VectorStoreRecordDataField()] = None + annotated_content: Annotated[str | None, "description"] = None + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 5 + assert data_model_definition.fields["content"].name == "content" + assert data_model_definition.fields["content"].property_type == "str" + assert data_model_definition.fields["content2"].name == "content2" + assert data_model_definition.fields["content2"].property_type == "str" + assert data_model_definition.fields["vector"].name == "vector" + assert data_model_definition.fields["id"].name == "id" + assert data_model_definition.fields["optional_content"].name == "optional_content" + assert data_model_definition.fields["optional_content"].property_type == "str" + assert data_model_definition.key_field_name == "id" + assert data_model_definition.container_mode is False + assert data_model_definition.vector_field_names == ["vector"] + + +def test_dataclass_inverse_fail(): + with raises(VectorStoreModelException): + + @dataclass + @vectorstoremodel + class DataModelClass: + id: Annotated[str, VectorStoreRecordKeyField()] + content: Annotated[str, VectorStoreRecordDataField()] + + +def test_pydantic_base_model(): + @vectorstoremodel + class DataModelClass(BaseModel): + content: Annotated[str, VectorStoreRecordDataField()] + content2: Annotated[str, VectorStoreRecordDataField] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + non_vector_store_content: str | None = None + optional_content: Annotated[str | None, VectorStoreRecordDataField()] = None + annotated_content: Annotated[str | None, "description"] = None + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 5 + assert data_model_definition.fields["content"].name == "content" + assert data_model_definition.fields["content"].property_type == "str" + assert data_model_definition.fields["content2"].name == "content2" + assert data_model_definition.fields["content2"].property_type == "str" + assert data_model_definition.fields["vector"].name == "vector" + assert data_model_definition.fields["id"].name == "id" + assert data_model_definition.fields["optional_content"].name == "optional_content" + assert data_model_definition.fields["optional_content"].property_type == "str" + assert data_model_definition.key_field_name == "id" + assert data_model_definition.container_mode is False + assert data_model_definition.vector_field_names == ["vector"] + + +def test_pydantic_dataclass(): + @vectorstoremodel + @pydantic_dataclass + class DataModelClass: + content: Annotated[str, VectorStoreRecordDataField()] + content2: Annotated[str, VectorStoreRecordDataField] + vector: Annotated[list[float], VectorStoreRecordVectorField()] + id: Annotated[str, VectorStoreRecordKeyField()] + non_vector_store_content: str | None = None + optional_content: Annotated[str | None, VectorStoreRecordDataField()] = None + annotated_content: Annotated[str | None, "description"] = None + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 5 + assert data_model_definition.fields["content"].name == "content" + assert data_model_definition.fields["content"].property_type == "str" + assert data_model_definition.fields["content2"].name == "content2" + assert data_model_definition.fields["content2"].property_type == "str" + assert data_model_definition.fields["vector"].name == "vector" + assert data_model_definition.fields["id"].name == "id" + assert data_model_definition.fields["optional_content"].name == "optional_content" + assert data_model_definition.fields["optional_content"].property_type == "str" + assert data_model_definition.key_field_name == "id" + assert data_model_definition.container_mode is False + assert data_model_definition.vector_field_names == ["vector"] + + +def test_empty_model(): + with raises(VectorStoreModelException): + + @vectorstoremodel + class DataModelClass: + def __init__(self): + pass + + +def test_non_annotated_no_default(): + with raises(VectorStoreModelException): + + @vectorstoremodel + class DataModelClass: + def __init__(self, non_vector_store_content: str): + self.non_vector_store_content = non_vector_store_content + + +def test_annotated_no_vsr_field_no_default(): + with raises(VectorStoreModelException): + + @vectorstoremodel + class DataModelClass: + def __init__( + self, + annotated_content: Annotated[str, "description"], + ): + self.annotated_content = annotated_content + + +def test_non_vector_list_and_dict(): + @vectorstoremodel + @dataclass + class DataModelClass: + key: Annotated[str, VectorStoreRecordKeyField()] + list1: Annotated[list[int], VectorStoreRecordDataField()] + list2: Annotated[list[str], VectorStoreRecordDataField] + dict1: Annotated[dict[str, int], VectorStoreRecordDataField()] + dict2: Annotated[dict[str, str], VectorStoreRecordDataField] + + assert hasattr(DataModelClass, "__kernel_vectorstoremodel__") + assert hasattr(DataModelClass, "__kernel_vectorstoremodel_definition__") + data_model_definition: VectorStoreRecordDefinition = DataModelClass.__kernel_vectorstoremodel_definition__ + assert len(data_model_definition.fields) == 5 + assert data_model_definition.fields["list1"].name == "list1" + assert data_model_definition.fields["list1"].property_type == "list[int]" + assert data_model_definition.fields["list2"].name == "list2" + assert data_model_definition.fields["list2"].property_type == "list[str]" + assert data_model_definition.fields["dict1"].name == "dict1" + assert data_model_definition.fields["dict1"].property_type == "dict" + assert data_model_definition.fields["dict2"].name == "dict2" + assert data_model_definition.fields["dict2"].property_type == "dict" + assert data_model_definition.container_mode is False diff --git a/python/tests/unit/data/test_vector_store_record_collection.py b/python/tests/unit/data/test_vector_store_record_collection.py new file mode 100644 index 000000000000..104205f081e5 --- /dev/null +++ b/python/tests/unit/data/test_vector_store_record_collection.py @@ -0,0 +1,559 @@ +# Copyright (c) Microsoft. All rights reserved. + +from copy import deepcopy +from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch + +import numpy as np +from pandas import DataFrame +from pytest import fixture, mark, raises + +from semantic_kernel.data.vector_store_record_collection import VectorStoreRecordCollection +from semantic_kernel.exceptions.memory_connector_exceptions import ( + MemoryConnectorException, + VectorStoreModelDeserializationException, + VectorStoreModelSerializationException, + VectorStoreModelValidationError, +) + + +@fixture(scope="function") +def vector_store_record_collection( + DictVectorStoreRecordCollection, + data_model_definition, + data_model_serialize_definition, + data_model_to_from_dict_definition, + data_model_container_definition, + data_model_container_serialize_definition, + data_model_pandas_definition, + data_model_type_vanilla, + data_model_type_vanilla_serialize, + data_model_type_vanilla_to_from_dict, + data_model_type_pydantic, + data_model_type_dataclass, + data_model_type_vector_array, + request, +) -> VectorStoreRecordCollection: + item = request.param if request and hasattr(request, "param") else "definition_basic" + defs = { + "definition_basic": data_model_definition, + "definition_with_serialize": data_model_serialize_definition, + "definition_with_to_from": data_model_to_from_dict_definition, + "definition_container": data_model_container_definition, + "definition_container_serialize": data_model_container_serialize_definition, + "definition_pandas": data_model_pandas_definition, + "type_vanilla": data_model_type_vanilla, + "type_vanilla_with_serialize": data_model_type_vanilla_serialize, + "type_vanilla_with_to_from_dict": data_model_type_vanilla_to_from_dict, + "type_pydantic": data_model_type_pydantic, + "type_dataclass": data_model_type_dataclass, + "type_vector_array": data_model_type_vector_array, + } + if item.endswith("pandas"): + return DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=DataFrame, + data_model_definition=defs[item], + ) + if item.startswith("definition_"): + return DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=defs[item], + ) + return DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=defs[item], + ) + + +def test_init(DictVectorStoreRecordCollection, data_model_definition): + vsrc = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + assert vsrc.collection_name == "test" + assert vsrc.data_model_type is dict + assert vsrc._container_mode is False + assert vsrc.data_model_definition == data_model_definition + assert vsrc._key_field_name == "id" + + +@mark.asyncio +async def test_context_manager(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection.close = AsyncMock() + async with DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ): + pass + DictVectorStoreRecordCollection.close.assert_called() + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + [ + "definition_basic", + "definition_with_serialize", + "definition_with_to_from", + "type_vanilla", + "type_vanilla_with_serialize", + "type_vanilla_with_to_from_dict", + "type_pydantic", + "type_dataclass", + "type_vector_array", + ], + indirect=True, +) +async def test_crud_operations(vector_store_record_collection): + id = "test_id" + record = {"id": id, "content": "test_content", "vector": [1.0, 2.0, 3.0]} + if vector_store_record_collection.data_model_definition.fields["vector"].deserialize_function is not None: + record["vector"] = vector_store_record_collection.data_model_definition.fields["vector"].deserialize_function( + record["vector"] + ) + if vector_store_record_collection.data_model_type is not dict: + model = vector_store_record_collection.data_model_type + record = model(**record) + no_records = await vector_store_record_collection.get(id) + assert no_records is None + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + if vector_store_record_collection.data_model_type is dict: + assert vector_store_record_collection.inner_storage[id] == record + else: + assert vector_store_record_collection.inner_storage[id]["content"] == record.content + record_2 = await vector_store_record_collection.get(id) + if vector_store_record_collection.data_model_type is dict: + assert record_2 == record + else: + if isinstance(record.vector, list): + assert record_2 == record + else: + assert record_2.id == record.id + assert record_2.content == record.content + assert np.array_equal(record_2.vector, record.vector) + await vector_store_record_collection.delete(id) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + [ + "definition_basic", + "definition_with_serialize", + "definition_with_to_from", + "type_vanilla", + "type_vanilla_with_serialize", + "type_vanilla_with_to_from_dict", + "type_pydantic", + "type_dataclass", + ], + indirect=True, +) +async def test_crud_batch_operations(vector_store_record_collection): + ids = ["test_id_1", "test_id_2"] + batch = [ + {"id": ids[0], "content": "test_content", "vector": [1.0, 2.0, 3.0]}, + {"id": ids[1], "content": "test_content", "vector": [1.0, 2.0, 3.0]}, + ] + if vector_store_record_collection.data_model_type is not dict: + model = vector_store_record_collection.data_model_type + batch = [model(**record) for record in batch] + no_records = await vector_store_record_collection.get_batch(ids) + assert no_records is None + await vector_store_record_collection.upsert_batch(batch) + assert len(vector_store_record_collection.inner_storage) == 2 + if vector_store_record_collection.data_model_type is dict: + assert vector_store_record_collection.inner_storage[ids[0]] == batch[0] + else: + assert vector_store_record_collection.inner_storage[ids[0]]["content"] == batch[0].content + records = await vector_store_record_collection.get_batch(ids) + assert records == batch + await vector_store_record_collection.delete_batch(ids) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + ["definition_container", "definition_container_serialize"], + indirect=True, +) +async def test_crud_operations_container(vector_store_record_collection): + id = "test_id" + record = {id: {"content": "test_content", "vector": [1.0, 2.0, 3.0]}} + no_records = await vector_store_record_collection.get(id) + assert no_records is None + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + assert vector_store_record_collection.inner_storage[id]["content"] == record[id]["content"] + assert vector_store_record_collection.inner_storage[id]["vector"] == record[id]["vector"] + record_2 = await vector_store_record_collection.get(id) + assert record_2 == record + await vector_store_record_collection.delete(id) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + ["definition_container", "definition_container_serialize"], + indirect=True, +) +async def test_crud_batch_operations_container(vector_store_record_collection): + ids = ["test_id_1", "test_id_2"] + batch = { + ids[0]: {"content": "test_content", "vector": [1.0, 2.0, 3.0]}, + ids[1]: {"content": "test_content", "vector": [1.0, 2.0, 3.0]}, + } + no_records = await vector_store_record_collection.get_batch(ids) + assert no_records is None + await vector_store_record_collection.upsert_batch(batch) + assert len(vector_store_record_collection.inner_storage) == 2 + assert vector_store_record_collection.inner_storage[ids[0]]["content"] == batch[ids[0]]["content"] + assert vector_store_record_collection.inner_storage[ids[0]]["vector"] == batch[ids[0]]["vector"] + records = await vector_store_record_collection.get_batch(ids) + assert records == batch + await vector_store_record_collection.delete_batch(ids) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + ["definition_pandas"], + indirect=True, +) +async def test_crud_operations_pandas(vector_store_record_collection): + id = "test_id" + record = DataFrame([{"id": id, "content": "test_content", "vector": [1.0, 2.0, 3.0]}]) + no_records = await vector_store_record_collection.get(id) + assert no_records is None + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + + assert vector_store_record_collection.inner_storage[id]["content"] == record["content"].values[0] + assert vector_store_record_collection.inner_storage[id]["vector"] == record["vector"].values[0] + record_2 = await vector_store_record_collection.get(id) + assert record_2.equals(record) + await vector_store_record_collection.delete(id) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +@mark.parametrize( + "vector_store_record_collection", + ["definition_pandas"], + indirect=True, +) +async def test_crud_batch_operations_pandas(vector_store_record_collection): + ids = ["test_id_1", "test_id_2"] + + batch = DataFrame([{"id": id, "content": "test_content", "vector": [1.0, 2.0, 3.0]} for id in ids]) + no_records = await vector_store_record_collection.get_batch(ids) + assert no_records is None + await vector_store_record_collection.upsert_batch(batch) + assert len(vector_store_record_collection.inner_storage) == 2 + assert vector_store_record_collection.inner_storage[ids[0]]["content"] == batch["content"].values[0] + assert vector_store_record_collection.inner_storage[ids[0]]["vector"] == batch["vector"].values[0] + records = await vector_store_record_collection.get_batch(ids) + assert records.equals(batch) + await vector_store_record_collection.delete_batch(ids) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +async def test_upsert_fail(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection._inner_upsert = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + with raises(MemoryConnectorException, match="Error upserting record:"): + await vector_store_record_collection.upsert(record) + with raises(MemoryConnectorException, match="Error upserting records:"): + await vector_store_record_collection.upsert_batch([record]) + assert len(vector_store_record_collection.inner_storage) == 0 + + +@mark.asyncio +async def test_get_fail(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection._inner_get = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + with raises(MemoryConnectorException, match="Error getting record:"): + await vector_store_record_collection.get("test_id") + with raises(MemoryConnectorException, match="Error getting records:"): + await vector_store_record_collection.get_batch(["test_id"]) + + +@mark.asyncio +async def test_get_fail_multiple(DictVectorStoreRecordCollection, data_model_definition): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + with ( + patch( + "semantic_kernel.data.vector_store_record_collection.VectorStoreRecordCollection.deserialize" + ) as deserialize_mock, + raises(MemoryConnectorException, match="Error deserializing record, multiple records returned:"), + ): + deserialize_mock.return_value = [ + {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]}, + {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]}, + ] + await vector_store_record_collection.get("test_id") + + +@mark.asyncio +async def test_serialize_fail(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection.serialize = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + with raises(MemoryConnectorException, match="Error serializing record"): + await vector_store_record_collection.upsert(record) + with raises(MemoryConnectorException, match="Error serializing record"): + await vector_store_record_collection.upsert_batch([record]) + + +@mark.asyncio +async def test_deserialize_fail(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection.deserialize = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + vector_store_record_collection.inner_storage["test_id"] = record + with raises(MemoryConnectorException, match="Error deserializing record"): + await vector_store_record_collection.get("test_id") + with raises(MemoryConnectorException, match="Error deserializing record"): + await vector_store_record_collection.get_batch(["test_id"]) + + +def test_serialize_custom_fail(DictVectorStoreRecordCollection, data_model_type_vanilla_serialize): + data_model_type_vanilla_serialize.serialize = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla_serialize, + ) + record = data_model_type_vanilla_serialize( + content="test_content", + vector=[1.0, 2.0, 3.0], + id="test_id", + ) + with raises(VectorStoreModelSerializationException, match="Error serializing record:"): + vector_store_record_collection.serialize(record) + + +def test_deserialize_custom_fail(DictVectorStoreRecordCollection, data_model_type_vanilla_serialize): + data_model_type_vanilla_serialize.deserialize = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla_serialize, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + with raises(VectorStoreModelSerializationException, match="Error deserializing record:"): + vector_store_record_collection.deserialize(record) + + +def test_serialize_data_model_to_dict_fail_mapping(DictVectorStoreRecordCollection, data_model_definition): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"content": "test_content", "vector": [1.0, 2.0, 3.0]} + with raises(VectorStoreModelSerializationException, match="Error serializing record"): + vector_store_record_collection._serialize_data_model_to_dict(record) + + +def test_serialize_data_model_to_dict_fail_object(DictVectorStoreRecordCollection, data_model_type_vanilla): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla, + ) + record = Mock(spec=data_model_type_vanilla) + with raises(VectorStoreModelSerializationException, match="Error serializing record"): + vector_store_record_collection._serialize_data_model_to_dict(record) + + +def test_deserialize_dict_data_model_fail_sequence(DictVectorStoreRecordCollection, data_model_type_vanilla): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla, + ) + with raises(VectorStoreModelDeserializationException, match="Cannot deserialize multiple records"): + vector_store_record_collection._deserialize_dict_to_data_model([{}, {}]) + + +def test_deserialize_dict_data_model_fail(DictVectorStoreRecordCollection, data_model_definition): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + with raises(VectorStoreModelDeserializationException, match="Error deserializing record"): + vector_store_record_collection._deserialize_dict_to_data_model( + {"content": "test_content", "vector": [1.0, 2.0, 3.0]} + ) + + +def test_deserialize_dict_data_model_shortcut(DictVectorStoreRecordCollection, data_model_definition): + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = vector_store_record_collection._deserialize_dict_to_data_model( + [{"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]}] + ) + assert record == {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + + +@mark.asyncio +@mark.parametrize("vector_store_record_collection", ["type_pydantic"], indirect=True) +async def test_pydantic_fail(vector_store_record_collection): + id = "test_id" + model = deepcopy(vector_store_record_collection.data_model_type) + dict_record = {"id": id, "content": "test_content", "vector": [1.0, 2.0, 3.0]} + record = model(**dict_record) + model.model_dump = MagicMock(side_effect=Exception) + with raises(VectorStoreModelSerializationException, match="Error serializing record:"): + vector_store_record_collection.serialize(record) + with raises(MemoryConnectorException, match="Error serializing record:"): + await vector_store_record_collection.upsert(record) + model.model_validate = MagicMock(side_effect=Exception) + with raises(VectorStoreModelDeserializationException, match="Error deserializing record:"): + vector_store_record_collection.deserialize(dict_record) + + +@mark.parametrize("vector_store_record_collection", ["type_vanilla_with_to_from_dict"], indirect=True) +def test_to_from_dict_fail(vector_store_record_collection): + id = "test_id" + model = deepcopy(vector_store_record_collection.data_model_type) + dict_record = {"id": id, "content": "test_content", "vector": [1.0, 2.0, 3.0]} + record = model(**dict_record) + model.to_dict = MagicMock(side_effect=Exception) + with raises(VectorStoreModelSerializationException, match="Error serializing record:"): + vector_store_record_collection.serialize(record) + model.from_dict = MagicMock(side_effect=Exception) + with raises(VectorStoreModelDeserializationException, match="Error deserializing record:"): + vector_store_record_collection.deserialize(dict_record) + + +@mark.asyncio +async def test_delete_fail(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection._inner_delete = MagicMock(side_effect=Exception) + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + record = {"id": "test_id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + with raises(MemoryConnectorException, match="Error deleting record:"): + await vector_store_record_collection.delete("test_id") + with raises(MemoryConnectorException, match="Error deleting records:"): + await vector_store_record_collection.delete_batch(["test_id"]) + assert len(vector_store_record_collection.inner_storage) == 1 + + +@mark.asyncio +async def test_collection_operations(vector_store_record_collection): + await vector_store_record_collection.create_collection() + assert await vector_store_record_collection.does_collection_exist() + record = {"id": "id", "content": "test_content", "vector": [1.0, 2.0, 3.0]} + await vector_store_record_collection.upsert(record) + assert len(vector_store_record_collection.inner_storage) == 1 + await vector_store_record_collection.delete_collection() + assert vector_store_record_collection.inner_storage == {} + await vector_store_record_collection.create_collection_if_not_exists() + + +@mark.asyncio +async def test_collection_create_if_not_exists(DictVectorStoreRecordCollection, data_model_definition): + DictVectorStoreRecordCollection.does_collection_exist = AsyncMock(return_value=False) + create_mock = AsyncMock() + DictVectorStoreRecordCollection.create_collection = create_mock + vector_store_record_collection = DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=dict, + data_model_definition=data_model_definition, + ) + await vector_store_record_collection.create_collection_if_not_exists() + create_mock.assert_called_once() + + +def test_data_model_validation(data_model_type_vanilla, DictVectorStoreRecordCollection): + DictVectorStoreRecordCollection.supported_key_types = PropertyMock(return_value=["str"]) + DictVectorStoreRecordCollection.supported_vector_types = PropertyMock(return_value=["float"]) + DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla, + ) + + +def test_data_model_validation_key_fail(data_model_type_vanilla, DictVectorStoreRecordCollection): + DictVectorStoreRecordCollection.supported_key_types = PropertyMock(return_value=["int"]) + with raises(VectorStoreModelValidationError, match="Key field must be one of"): + DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla, + ) + + +def test_data_model_validation_vector_fail(data_model_type_vanilla, DictVectorStoreRecordCollection): + DictVectorStoreRecordCollection.supported_vector_types = PropertyMock(return_value=["list[int]"]) + with raises(VectorStoreModelValidationError, match="Vector field "): + DictVectorStoreRecordCollection( + collection_name="test", + data_model_type=data_model_type_vanilla, + ) + + +@mark.asyncio +async def test_upsert_with_vectorizing(vector_store_record_collection): + record = {"id": "test_id", "content": "test_content"} + record2 = {"id": "test_id", "content": "test_content"} + + async def embedding_func(record, type, definition): + if isinstance(record, list): + for r in record: + r["vector"] = [1.0, 2.0, 3.0] + return record + record["vector"] = [1.0, 2.0, 3.0] + return record + + await vector_store_record_collection.upsert(record, embedding_generation_function=embedding_func) + assert vector_store_record_collection.inner_storage["test_id"]["vector"] == [1.0, 2.0, 3.0] + await vector_store_record_collection.delete("test_id") + assert len(vector_store_record_collection.inner_storage) == 0 + await vector_store_record_collection.upsert_batch([record2], embedding_generation_function=embedding_func) + assert vector_store_record_collection.inner_storage["test_id"]["vector"] == [1.0, 2.0, 3.0] + + +# TODO (eavanvalkenburg): pandas container test diff --git a/python/tests/unit/data/test_vector_store_record_definition.py b/python/tests/unit/data/test_vector_store_record_definition.py new file mode 100644 index 000000000000..da70fe7bef99 --- /dev/null +++ b/python/tests/unit/data/test_vector_store_record_definition.py @@ -0,0 +1,54 @@ +# Copyright (c) Microsoft. All rights reserved. + +from pytest import raises + +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import VectorStoreRecordDataField, VectorStoreRecordKeyField +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException + + +def test_vector_store_record_definition(): + id_field = VectorStoreRecordKeyField() + vsrd = VectorStoreRecordDefinition(fields={"id": id_field}) + assert vsrd.fields == {"id": VectorStoreRecordKeyField(name="id")} + assert vsrd.key_field_name == "id" + assert vsrd.key_field == id_field + assert vsrd.field_names == ["id"] + assert vsrd.vector_field_names == [] + assert vsrd.container_mode is False + assert vsrd.to_dict is None + assert vsrd.from_dict is None + assert vsrd.serialize is None + assert vsrd.deserialize is None + + +def test_no_fields_fail(): + with raises(VectorStoreModelException): + VectorStoreRecordDefinition(fields={}) + + +def test_no_name_fields_fail(): + with raises(VectorStoreModelException): + VectorStoreRecordDefinition(fields={None: VectorStoreRecordKeyField()}) # type: ignore + with raises(VectorStoreModelException): + VectorStoreRecordDefinition(fields={"": VectorStoreRecordKeyField()}) + + +def test_no_key_field_fail(): + with raises(VectorStoreModelException): + VectorStoreRecordDefinition(fields={"content": VectorStoreRecordDataField()}) + + +def test_multiple_key_field_fail(): + with raises(VectorStoreModelException): + VectorStoreRecordDefinition(fields={"key1": VectorStoreRecordKeyField(), "key2": VectorStoreRecordKeyField()}) + + +def test_no_matching_vector_field_fail(): + with raises(VectorStoreModelException): + VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector"), + } + ) diff --git a/python/tests/unit/data/test_vector_store_record_utils.py b/python/tests/unit/data/test_vector_store_record_utils.py new file mode 100644 index 000000000000..01a1d832c0b3 --- /dev/null +++ b/python/tests/unit/data/test_vector_store_record_utils.py @@ -0,0 +1,44 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, MagicMock + +from pytest import mark, raises + +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.vector_store_record_fields import ( + VectorStoreRecordDataField, + VectorStoreRecordKeyField, + VectorStoreRecordVectorField, +) +from semantic_kernel.data.vector_store_record_utils import VectorStoreRecordUtils +from semantic_kernel.exceptions.memory_connector_exceptions import VectorStoreModelException +from semantic_kernel.kernel import Kernel + + +@mark.asyncio +async def test_add_vector_to_records(data_model_definition): + kernel = MagicMock(spec=Kernel) + kernel.add_embedding_to_object = AsyncMock() + utils = VectorStoreRecordUtils(kernel) + assert utils is not None + record = {"id": "test_id", "content": "content"} + await utils.add_vector_to_records(record, None, data_model_definition) + kernel.add_embedding_to_object.assert_called_once() + + +@mark.asyncio +async def test_add_vector_wrong_fields(): + data_model = VectorStoreRecordDefinition( + fields={ + "id": VectorStoreRecordKeyField(), + "content": VectorStoreRecordDataField(has_embedding=True, embedding_property_name="id"), + "vector": VectorStoreRecordVectorField(), + } + ) + kernel = MagicMock(spec=Kernel) + kernel.add_embedding_to_object = AsyncMock() + utils = VectorStoreRecordUtils(kernel) + assert utils is not None + record = {"id": "test_id", "content": "content"} + with raises(VectorStoreModelException, match="Embedding field"): + await utils.add_vector_to_records(record, None, data_model)