diff --git a/guardrails_api/blueprints/guards.py b/guardrails_api/blueprints/guards.py index dea5083..1c1115e 100644 --- a/guardrails_api/blueprints/guards.py +++ b/guardrails_api/blueprints/guards.py @@ -16,7 +16,7 @@ from guardrails_api.clients.postgres_client import postgres_is_enabled from guardrails_api.utils.handle_error import handle_error from guardrails_api.utils.get_llm_callable import get_llm_callable - +from guardrails_api.utils.openai import outcome_to_chat_completion, outcome_to_stream_response guards_bp = Blueprint("guards", __name__, url_prefix="/guards") @@ -151,6 +151,87 @@ def collect_telemetry( validate_span.set_attribute("num_of_reasks", num_of_reasks) +@guards_bp.route("//openai/v1/chat/completions", methods=["POST"]) +@handle_error +def openai_v1_chat_completions(guard_name: str): + # This endpoint implements the OpenAI Chat API + # It is mean to be fully compatible + # The only difference is that it uses the Guard API under the hood + # instead of the OpenAI API and supports guardrail API error handling + # To use this with the OpenAI SDK you can use the following code: + # import openai + # openai.base_url = "http://localhost:8000/guards//openai/v1/" + # response = openai.chat.completions( + # model="gpt-3.5-turbo-0125", + # messages=[ + # {"role": "user", "content": "Hello, how are you?"}, + # ], + # stream=True, + # ) + # print(response) + # to configure guard rails error handling from the server side you can use the following code: + # + + payload = request.json + decoded_guard_name = unquote_plus(guard_name) + guard_struct = guard_client.get_guard(decoded_guard_name) + guard = guard_struct + if not isinstance(guard_struct, Guard): + guard: Guard = Guard.from_dict(guard_struct.to_dict()) + stream = payload.get("stream", False) + has_tool_gd_tool_call = False + + try: + tools = payload.get("tools", []) + tools.filter(lambda tool: tool["funcion"]["name"] == "gd_response_tool") + has_tool_gd_tool_call = len(tools) > 0 + except (KeyError, AttributeError): + pass + + if not stream: + try: + validation_outcome: ValidationOutcome = guard( + # todo make this come from the guard struct? + # currently we dont support .configure + num_reasks=0, + **payload, + ) + llm_response = guard.history[-1].iterations[-1].outputs.llm_response_info + result = outcome_to_chat_completion( + validation_outcome=validation_outcome, + llm_response=llm_response, + has_tool_gd_tool_call=has_tool_gd_tool_call, + ) + return result + except Exception as e: + raise HttpError( + status=400, + message="BadRequest", + cause=(str(e)), + ) + + else: + # need to return validated chunks that look identical to openai's + # should look something like + # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":None,"finish_reason":None}]} + # .... + # data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1694268190,"model":"gpt-3.5-turbo-0125", "system_fingerprint": "fp_44709d6fcb", "choices":[{"index":0,"delta":{},"logprobs":None,"finish_reason":"stop"}]} + def openai_streamer(): + guard_stream = guard( + num_reasks=0, + **payload, + ) + for result in guard_stream: + chunk_string = f"data: {json.dumps(outcome_to_stream_response(validation_outcome=result))}\n\n" + yield chunk_string.encode("utf-8") + # close the stream + yield b"\n" + + return Response( + stream_with_context(openai_streamer()), + ) + + @guards_bp.route("//validate", methods=["POST"]) @handle_error def validate(guard_name: str): diff --git a/guardrails_api/start-dev.sh b/guardrails_api/start-dev.sh index 1b53bfb..a27f2d3 100755 --- a/guardrails_api/start-dev.sh +++ b/guardrails_api/start-dev.sh @@ -1 +1 @@ -gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload +gunicorn --bind 0.0.0.0:8000 --timeout=5 --threads=10 "guardrails_api.app:create_app()" --reload --capture-output --enable-stdio-inheritance diff --git a/guardrails_api/utils/openai.py b/guardrails_api/utils/openai.py new file mode 100644 index 0000000..10ecfa2 --- /dev/null +++ b/guardrails_api/utils/openai.py @@ -0,0 +1,61 @@ +from guardrails.classes import ValidationOutcome + +def outcome_to_stream_response(validation_outcome: ValidationOutcome): + stream_chunk_template = { + "choices": [ + { + "delta": { + "content": validation_outcome.validated_output, + }, + } + ], + "guardrails": { + "reask": validation_outcome.reask or None, + "validation_passed": validation_outcome.validation_passed, + "error": validation_outcome.error or None, + }, + } + # does this even make sense with a stream? wed need each chunk as theyre emitted + stream_chunk = stream_chunk_template + stream_chunk["choices"][0]["delta"]["content"] = validation_outcome.validated_output + return stream_chunk + + +def outcome_to_chat_completion( + validation_outcome: ValidationOutcome, + llm_response, + has_tool_gd_tool_call=False, +): + completion_template = ( + {"choices": [{"message": {"content": ""}}]} + if not has_tool_gd_tool_call + else { + "choices": [{"message": {"tool_calls": [{"function": {"arguments": ""}}]}}] + } + ) + completion = getattr(llm_response, "full_raw_llm_output", completion_template) + completion["guardrails"] = { + "reask": validation_outcome.reask or None, + "validation_passed": validation_outcome.validation_passed, + "error": validation_outcome.error or None, + } + + # string completion + try: + completion["choices"][0]["message"]["content"] = ( + validation_outcome.validated_output + ) + except KeyError: + pass + + # tool completion + try: + choice = completion["choices"][0] + # if this is accessible it means a tool was called so set our validated output to that + choice["message"]["tool_calls"][-1]["function"]["arguments"] = ( + validation_outcome.validated_output + ) + except KeyError: + pass + + return completion diff --git a/pyproject.toml b/pyproject.toml index c492ed7..b8078d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" keywords = ["Guardrails", "Guardrails AI", "Guardrails API", "Guardrails API"] requires-python = ">= 3.8.1" dependencies = [ - "guardrails-ai>=0.5.0a2", + "guardrails-ai>=0.5.0a11", "flask>=3.0.3,<4", "Flask-SQLAlchemy>=3.1.1,<4", "Flask-Caching>=2.3.0,<3", diff --git a/tests/blueprints/test_guards.py b/tests/blueprints/test_guards.py index d33a93a..1b9e927 100644 --- a/tests/blueprints/test_guards.py +++ b/tests/blueprints/test_guards.py @@ -9,7 +9,7 @@ from tests.mocks.mock_request import MockRequest from guardrails.classes import ValidationOutcome from guardrails.classes.generic import Stack -from guardrails.classes.history import Call +from guardrails.classes.history import Call, Iteration from guardrails_api.app import register_config # TODO: Should we mock this somehow? @@ -44,10 +44,11 @@ def test_route_setup(mocker): from guardrails_api.blueprints.guards import guards_bp - assert guards_bp.route_call_count == 4 + assert guards_bp.route_call_count == 5 assert guards_bp.routes == [ "/", "/", + "//openai/v1/chat/completions", "//validate", "//history/", ] @@ -546,3 +547,77 @@ def test_validate__call(mocker): } del os.environ["PGHOST"] + +def test_openai_v1_chat_completions__call(mocker): + from guardrails_api.blueprints.guards import openai_v1_chat_completions + os.environ["PGHOST"] = "localhost" + mock_guard = MockGuardStruct() + mock_outcome = ValidationOutcome( + call_id="mock-call-id", + raw_llm_output="Hello world!", + validated_output="Hello world!", + validation_passed=False, + ) + + mock___call__ = mocker.patch.object(MockGuardStruct, "__call__") + mock___call__.return_value = mock_outcome + + mock_from_dict = mocker.patch("guardrails_api.blueprints.guards.Guard.from_dict") + mock_from_dict.return_value = mock_guard + + mock_request = MockRequest( + "POST", + json={ + "messages": [{"role":"user", "content":"Hello world!"}], + }, + headers={"x-openai-api-key": "mock-key"}, + ) + + mocker.patch("flask.Blueprint", new=MockBlueprint) + mocker.patch("guardrails_api.blueprints.guards.request", mock_request) + mock_get_guard = mocker.patch( + "guardrails_api.blueprints.guards.guard_client.get_guard", + return_value=mock_guard, + ) + mocker.patch( + "guardrails_api.blueprints.guards.get_llm_callable", + return_value="openai.Completion.create", + ) + + mocker.patch("guardrails_api.blueprints.guards.CacheClient.set") + + mock_status = mocker.patch( + "guardrails.classes.history.call.Call.status", new_callable=PropertyMock + ) + mock_status.return_value = "fail" + mock_call = Call() + mock_call.iterations= Stack(Iteration('some-id', 1)) + mock_guard.history = Stack(mock_call) + + response = openai_v1_chat_completions("My%20Guard's%20Name") + + mock_get_guard.assert_called_once_with("My Guard's Name") + + assert mock___call__.call_count == 1 + + mock___call__.assert_called_once_with( + num_reasks=0, + messages=[{"role":"user", "content":"Hello world!"}], + ) + + assert response == { + "choices": [ + { + "message": { + "content": "Hello world!", + }, + } + ], + "guardrails": { + "reask": None, + "validation_passed": False, + "error": None, + }, + } + + del os.environ["PGHOST"] \ No newline at end of file