guardrails-ai · zsimjee · Nov 17, 2023 · Sep 26, 2023 · Oct 17, 2023 · Oct 17, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,7 +20,6 @@ jobs:
         strategy:
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
-
         steps:
             -   uses: actions/checkout@v2
             -   name: Set up Python ${{ matrix.python-version }}
@@ -51,7 +50,7 @@ jobs:
         strategy:
             matrix:
                 python-version: ['3.8', '3.9', '3.10', '3.11']
-
+                pydantic-version: ['1.10.9', '2.4.2']
         steps:
             -   uses: actions/checkout@v2
             -   name: Set up Python ${{ matrix.python-version }}
@@ -63,7 +62,7 @@ jobs:
                 uses: actions/cache@v3
                 with:
                     path: ~/.cache/pypoetry
-                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}
+                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ matrix.pydantic-version }}
 
             -   name: Install Poetry
                 uses: snok/install-poetry@v1
@@ -72,10 +71,17 @@ jobs:
                 # TODO: fix errors so that we can run `make dev` instead
                 run: |
                     make full
+                    poetry run pip install pydantic==${{ matrix.pydantic-version }}
+
+            -   if: matrix.pydantic-version == '2.4.2'
+                name: Static analysis with pyright (ignoring pydantic v1)
+                run: |
+                    make type-pydantic-v2
 
-            -   name: Static analysis with pyright
+            -   if: matrix.pydantic-version == '1.10.9'
+                name: Static analysis with mypy (ignoring pydantic v2)
                 run: |
-                    make type
+                    make type-pydantic-v1
 
     Pytests:
         runs-on: ubuntu-latest
@@ -85,7 +91,7 @@ jobs:
                 # TODO: fix errors so that we can run both `make dev` and `make full`
                 # dependencies: ['dev', 'full']
                 dependencies: ['full']
-
+                pydantic-version: ['1.10.9', '2.4.2']
         steps:
             -   uses: actions/checkout@v2
             -   name: Set up Python ${{ matrix.python-version }}
@@ -97,15 +103,15 @@ jobs:
                 uses: actions/cache@v3
                 with:
                     path: ~/.cache/pypoetry
-                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}
+                    key: poetry-cache-${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ matrix.pydantic-version }}
 
             -   name: Install Poetry
                 uses: snok/install-poetry@v1
 
             -   name: Install Dependencies
                 run: |
-                    python -m pip install --upgrade pip
                     make ${{ matrix.dependencies }}
+                    python -m pip install pydantic==${{ matrix.pydantic-version }}
 
             -   name: Run Pytests
                 run: |

diff --git a/Makefile b/Makefile
@@ -8,6 +8,16 @@ autoformat:
 type:
 	poetry run pyright guardrails/
 
+type-pydantic-v1:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v2.py"]}' > pyrightconfig.json
+	poetry run pyright guardrails/
+	rm pyrightconfig.json
+
+type-pydantic-v2:
+	echo '{"exclude": ["guardrails/utils/pydantic_utils/v1.py"]}' > pyrightconfig.json
+	poetry run pyright guardrails/
+	rm pyrightconfig.json
+
 lint:
 	poetry run isort -c guardrails/ tests/
 	poetry run black guardrails/ tests/ --check
@@ -44,4 +54,4 @@ all: autoformat type lint docs test
 precommit:
 	# pytest -x -q --no-summary
 	pyright guardrails/
-	make lint
+	make lint
diff --git a/docs/examples/check_for_pii.ipynb b/docs/examples/check_for_pii.ipynb
@@ -0,0 +1,267 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Check whether an LLM response contains PII (Personally Identifiable Information)\n",
+    "\n",
+    "**Using the `PIIFilter` validator**\n",
+    "\n",
+    "This is a simple check that looks for the presence of a few common PII patterns\n",
+    "It is not intended to be a comprehensive check for PII and to be a quick check that can be used to filter out responses that are likely to contain PII. It uses the Microsoft Presidio library to check for PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "You can now load the package via spacy.load('en_core_web_lg')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Install the necessary packages\n",
+    "! pip install presidio-analyzer presidio-anonymizer -q\n",
+    "! python -m spacy download en_core_web_lg -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import the guardrails package\n",
+    "import guardrails as gd\n",
+    "from guardrails.validators import PIIFilter\n",
+    "from rich import print"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karanacharya/guardrails-ai/guardrails/guardrails/rail.py:115: UserWarning: Prompt must be provided during __call__.\n",
+      "  warnings.warn(\"Prompt must be provided during __call__.\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create Guard object with this validator\n",
+    "# One can specify either pre-defined set of PII or SPI (Sensitive Personal Information) entities by passing in the `pii` or `spi` argument respectively.\n",
+    "# It can be passed either durring intialization or later through the metadata argument in parse method.\n",
+    "\n",
+    "# One can also pass in a list of entities supported by Presidio to the `pii_entities` argument.\n",
+    "guard = gd.Guard.from_string(\n",
+    "    validators=[PIIFilter(pii_entities=\"pii\", on_fail=\"fix\")],\n",
+    "    description=\"testmeout\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">EMAIL_ADDRESS</span><span style=\"color: #000000; text-decoration-color: #000000\">&gt;, and my phone number is &lt;PHONE_NUMBER</span><span style=\"font-weight: bold\">&gt;</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is \u001b[1m<\u001b[0m\u001b[1;95mEMAIL_ADDRESS\u001b[0m\u001b[39m>, and my phone number is <PHONE_NUMBER\u001b[0m\u001b[1m>\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Parse the text\n",
+    "text = \"My email address is [email protected], and my phone number is 1234567890\"\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, both EMAIL_ADDRESS and PHONE_NUMBER are detected as PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">EMAIL_ADDRESS</span><span style=\"font-weight: bold\">&gt;</span>, and my phone number is <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1234567890</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is \u001b[1m<\u001b[0m\u001b[1;95mEMAIL_ADDRESS\u001b[0m\u001b[1m>\u001b[0m, and my phone number is \u001b[1;36m1234567890\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Let's test with passing through metadata for the same guard object\n",
+    "# This will take precendence over the entities passed in during initialization\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    "    metadata={\"pii_entities\": [\"EMAIL_ADDRESS\"]},\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As you can see here, only EMAIL_ADDRESS is detected as PII, and the PHONE_NUMBER is not detected as PII.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's try with SPI entities\n",
+    "# Create a new guard object\n",
+    "guard = gd.Guard.from_string(\n",
+    "    validators=[PIIFilter(pii_entities=\"spi\", on_fail=\"fix\")],\n",
+    "    description=\"testmeout\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My email address is [email protected], and my account number is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">US_BANK_NUMBER</span><span style=\"font-weight: bold\">&gt;</span>.\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My email address is [email protected], and my account number is \u001b[1m<\u001b[0m\u001b[1;95mUS_BANK_NUMBER\u001b[0m\u001b[1m>\u001b[0m.\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Parse text\n",
+    "text = \"My email address is [email protected], and my account number is 1234789012367654.\"\n",
+    "\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, only the US_BANK_NUMBER is detected as PII, as specified in the \"spi\" entities. Refer to the documentation for more information on the \"pii\" and \"spi\" entities. Obviosuly, you can pass in any [Presidio-supported entities](https://microsoft.github.io/presidio/supported_entities/) through the metadata.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">My ITIN is <span style=\"font-weight: bold\">&lt;</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff; font-weight: bold\">US_ITIN</span><span style=\"color: #000000; text-decoration-color: #000000\">&gt; and my driver's license number is &lt;US_DRIVER_LICENSE</span><span style=\"font-weight: bold\">&gt;</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "My ITIN is \u001b[1m<\u001b[0m\u001b[1;95mUS_ITIN\u001b[0m\u001b[39m> and my driver's license number is <US_DRIVER_LICENSE\u001b[0m\u001b[1m>\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Another example\n",
+    "text = \"My ITIN is 923756789 and my driver's license number is 87651239\"\n",
+    "\n",
+    "output = guard.parse(\n",
+    "    llm_output=text,\n",
+    "    metadata={\"pii_entities\": [\"US_ITIN\", \"US_DRIVER_LICENSE\"]},\n",
+    ")\n",
+    "\n",
+    "# Print the output\n",
+    "print(output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### In this way, any PII entity that you want to check for can be passed in through the metadata and masked by Guardrails for your LLM outputs. Of-course, like all other examples, you can integrate this into your own code and workflows through the complete Guard execution.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "guard-venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/guardrails/utils/parsing_utils.py b/guardrails/utils/parsing_utils.py
@@ -65,7 +65,7 @@ def get_code_block(
 
 def get_template_variables(template: str) -> List[str]:
     if hasattr(Template, "get_identifiers"):
-        return Template(template).get_identifiers()
+        return Template(template).get_identifiers()  # type: ignore
     else:
         d = collections.defaultdict(str)
         Template(template).safe_substitute(d)

diff --git a/guardrails/utils/pydantic_utils/__init__.py b/guardrails/utils/pydantic_utils/__init__.py
@@ -11,7 +11,13 @@
         convert_pydantic_model_to_openai_fn,
     )
 else:
-    raise NotImplementedError(f"Pydantic version {PYDANTIC_VERSION} is not supported.")
+    from .v2 import (
+        ArbitraryModel,
+        add_pydantic_validators_as_guardrails_validators,
+        add_validator,
+        convert_pydantic_model_to_datatype,
+        convert_pydantic_model_to_openai_fn,
+    )
 
 
 __all__ = [