Validation with LLM
Overview¶
You can use LLM capability to semantically process the context to validate the response following natural language instructions. This way you can implement more complex validation logic that would be difficult (or impossible) to achieve using traditional, code-based validation.
Example¶
<?php
require 'examples/boot.php';
use Cognesy\Events\Event;
use Cognesy\Instructor\Extras\Scalar\Scalar;
use Cognesy\Instructor\StructuredOutput;
use Cognesy\Instructor\StructuredOutputRuntime;
use Cognesy\Instructor\Validation\Traits\ValidationMixin;
use Cognesy\Instructor\Validation\ValidationResult;
use Cognesy\Polyglot\Inference\LLMProvider;
use Cognesy\Schema\Attributes\Description;
use Cognesy\Utils\Str;
class UserDetails
{
use ValidationMixin;
public string $name;
#[Description('User details in format: key=value')]
/** @var string[] */
public array $details;
public function validate() : ValidationResult {
return match($this->hasPII()) {
true => ValidationResult::fieldError(
field: 'details',
value: implode("\n", $this->details),
message: "Details contain sensitive PII (phone numbers, SSNs, financial data) - remove those fields from the response."
),
false => ValidationResult::valid(),
};
}
private function hasPII() : bool {
$data = implode("\n", $this->details);
return StructuredOutput::using('openai')
->with(
messages: "Text: {$data}\n\nDoes this text contain a phone number (e.g. +1 123 34 45), SSN (e.g. 123-45-6789), credit card, or bank account number? Answer TRUE only for those specific patterns. Answer FALSE for plain age numbers (e.g. 25), names, or job titles like 'developer'.",
responseModel: Scalar::boolean('hasPII', 'True only if the text contains a phone number, SSN, credit card, or bank account number. False for age numbers, names, or job titles.'),
)
->getBoolean();
}
}
$text = <<<TEXT
My name is Jason. I am is 25 years old. I am developer.
My phone number is +1 123 34 45 and social security number is 123-45-6789
TEXT;
$runtime = StructuredOutputRuntime::fromProvider(LLMProvider::using('openai'))
->withMaxRetries(2)
->wiretap(fn(Event $e) => $e->print()); // let's check the internals of Instructor processing
$user = (new StructuredOutput($runtime))
->with(
messages: $text,
responseModel: UserDetails::class,
)->get();
dump($user);
assert(!Str::contains(implode("\n", $user->details), '123-45-6789'));
?>