Skip to content

Modular processing with Modules

Instructor provides an addon allowing to implement complex processing flows using LLM in a modular way. This addon to Instructor has been inspired by DSPy library for Python (https://github.com/stanfordnlp/dspy).

Key components of language program: - Module subclasses - encapsulate processing logic - Signatures - define input and output for data processed by modules

NOTE: Other concepts from DSPy (optimizer, compiler, evaluator) have not been implemented yet.

Module consists of 3 key parts: - __construct() - initialization of module, prepare dependencies, setup submodules - signature() - define input and output for data processed by module - forward() - processing logic, return output data

Predict class is a special module, that uses Instructor's structured processing capabilities to execute inference on provided inputs and return output in a requested format.

<?php

use Cognesy\Instructor\Extras\Module\Addons\Predict\Predict;
use Cognesy\Instructor\Extras\Module\Core\Module;
use Cognesy\Instructor\Extras\Module\Signature\Attributes\InputField;
use Cognesy\Instructor\Extras\Module\Signature\Attributes\OutputField;
use Cognesy\Instructor\Extras\Module\Signature\Signature;
use Cognesy\Instructor\Extras\Module\CallData\SignatureData;
use Cognesy\Instructor\Instructor;

$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

// DATA MODEL DECLARATIONS ////////////////////////////////////////////////////////////////

class EmailAnalysis extends SignatureData {
    #[InputField('content of email')]
    public string $text;
    #[OutputField('identify most relevant email topic: sales, support, other, spam')]
    public string $topic;
    #[OutputField('one word sentiment: positive, neutral, negative')]
    public string $sentiment;

    public static function for(string $text) : static {
        return self::fromArgs(text: $text);
    }
}

class CategoryCount {
    public function __construct(
        public int $sales = 0,
        public int $support = 0,
        public int $spam = 0,
        public int $other = 0,
    ) {}
}

class EmailStats extends SignatureData {
    #[InputField('directory containing emails')]
    public string $directory;
    #[OutputField('number of emails')]
    public int $emails;
    #[OutputField('number of spam emails')]
    public int $spam;
    #[OutputField('average sentiment ratio')]
    public float $sentimentRatio;
    #[OutputField('spam ratio')]
    public float $spamRatio;
    #[OutputField('category counts')]
    public CategoryCount $categories;

    static public function for(string $directory) : static {
        return self::fromArgs(directory: $directory);
    }
}

// MODULE DECLARATIONS ////////////////////////////////////////////////////////////////////

class ReadEmails extends Module {
    public function __construct(
        private array $directoryContents = []
    ) {}

    public function signature() : string|Signature {
        return 'directory -> emails : string[]';
    }

    protected function forward(string $directory) : array {
        return $this->directoryContents[$directory];
    }
}

class ParseEmail extends Module {
    public function signature() : string|Signature {
        return 'email -> sender, subject, body';
    }

    protected function forward(string $email) : array {
        $parts = explode(',', $email);
        return [
            'sender' => trim(explode(':', $parts[0])[1]),
            'subject' => trim(explode(':', $parts[1])[1]),
            'body' => trim(explode(':', $parts[2])[1]),
        ];
    }
}

class GetStats extends Module {
    public function __construct(
        private ReadEmails $readEmails,
        private ParseEmail $parseEmail,
        private Predict $analyseEmail,
    ) {}

    public function signature() : string|Signature {
        return EmailStats::class;
    }

    protected function forward(string $directory) : EmailStats {
        $emails = $this->readEmails->withArgs(directory: $directory)->get('emails');
        $aggregateSentiment = 0;
        $categories = new CategoryCount;
        foreach ($emails as $email) {
            $parsedEmail = $this->parseEmail->withArgs(email: $email);
            $emailData = EmailAnalysis::for(text: $parsedEmail->get('body'));
            $emailAnalysis = $this->analyseEmail->with($emailData);
            $topic = $emailAnalysis->get('topic');
            $sentiment = $emailAnalysis->get('sentiment');
            $topic = (in_array($topic, ['sales', 'support', 'spam'])) ? $topic : 'other';
            $categories->$topic++;
            if ($topic === 'spam') {
                continue;
            }
            $aggregateSentiment += match($sentiment) {
                'positive' => 1,
                'neutral' => 0,
                'negative' => -1,
            };
        }
        $spamRatio = $categories->spam / count($emails);
        $sentimentRatio = $aggregateSentiment / (count($emails) - $categories->spam);

        $result = new EmailStats;
        $result->emails = count($emails);
        $result->spam = $categories->spam;
        $result->sentimentRatio = $sentimentRatio;
        $result->spamRatio = $spamRatio;
        $result->categories = $categories;
        return $result;
    }
}

$directoryContents['inbox'] = [
    'sender: jl@gmail.com, subject: Offer, body: I am happy about the discount you offered and accept contract renewal',
    'sender: xxx, subject: Free!!!, body: Get Ozempic for free',
    'sender: joe@wp.pl, subject: Problem, body: My internet connection keeps failing',
    'sender: paul@x.io, subject: Still no pricing, body: How long do I have to wait for the pricing of custom support service?!?',
    'sender: joe@wp.pl, subject: Slow connection, body: 2 weeks of waiting and still no improvement of my connection',
];

// PREPARE DEPENDENCIES

$instructor = (new Instructor);
$readEmails = new ReadEmails($directoryContents);
$parseEmail = new ParseEmail();
$analyseEmail = new Predict(signature: EmailAnalysis::class, instructor: $instructor);
$getStats = new GetStats($readEmails, $parseEmail, $analyseEmail);

// EXECUTE LANGUAGE PROGRAM

$emailStats = $getStats->with(EmailStats::for('inbox'));

echo "Results:\n";
dump($emailStats->get());
?>