Skip to content

instance_level

instance_level

Instance-level checklist generators (one checklist per input).

DirectGenerator

Bases: InstanceChecklistGenerator

Generate checklists using a prompt template + structured JSON output.

Can be configured via pipeline presets (built-in methods) or custom prompts.

Parameters:

Name Type Description Default
method_name str

Pipeline preset name (e.g., "tick") or custom name. If a known preset, loads config from PIPELINE_PRESETS.

'custom'
custom_prompt Optional[Union[str, Path]]

Custom prompt template. Pass a Path to load from file, or a str for raw prompt text. Overrides preset template.

None
response_schema Optional[type]

Pydantic model for JSON validation. Default: ChecklistResponse.

None
format_name Optional[str]

Format prompt file name (e.g., "checklist"). Default from preset.

None
max_items int

Maximum checklist items to return.

10
min_items int

Minimum expected items.

2
**kwargs Any

Passed to InstanceChecklistGenerator (model, temperature, etc.)

{}
Source code in autochecklist/generators/instance_level/direct.py
class DirectGenerator(InstanceChecklistGenerator):
    """Generate checklists using a prompt template + structured JSON output.

    Can be configured via pipeline presets (built-in methods) or custom prompts.

    Args:
        method_name: Pipeline preset name (e.g., "tick") or custom name.
            If a known preset, loads config from PIPELINE_PRESETS.
        custom_prompt: Custom prompt template. Pass a Path to load from file,
            or a str for raw prompt text. Overrides preset template.
        response_schema: Pydantic model for JSON validation. Default: ChecklistResponse.
        format_name: Format prompt file name (e.g., "checklist"). Default from preset.
        max_items: Maximum checklist items to return.
        min_items: Minimum expected items.
        **kwargs: Passed to InstanceChecklistGenerator (model, temperature, etc.)
    """

    def __init__(
        self,
        method_name: str = "custom",
        custom_prompt: Optional[Union[str, Path]] = None,
        response_schema: Optional[type] = None,
        format_name: Optional[str] = None,
        max_items: int = 10,
        min_items: int = 2,
        **kwargs: Any,
    ):
        # Load preset defaults if this is a known method
        from .pipeline_presets import PIPELINE_PRESETS

        preset = PIPELINE_PRESETS.get(method_name, {})

        # Apply preset defaults, allowing kwargs to override
        if "temperature" not in kwargs and "temperature" in preset:
            kwargs["temperature"] = preset["temperature"]

        super().__init__(**kwargs)

        self._method_name = method_name
        self.max_items = preset.get("max_items", max_items)
        self.min_items = preset.get("min_items", min_items)

        is_custom_schema = response_schema is not None
        self._response_schema = response_schema or preset.get(
            "response_schema", ChecklistResponse
        )
        if format_name is not None:
            self._format_name = format_name
        elif is_custom_schema:
            self._format_name = None
        else:
            self._format_name = preset.get("format_name", "checklist")

        # Load template
        if custom_prompt is not None:
            if isinstance(custom_prompt, Path):
                template_text = custom_prompt.read_text(encoding="utf-8")
            else:
                template_text = custom_prompt
        elif preset:
            template_text = load_template(
                preset["template_dir"], preset["template_name"]
            )
        else:
            raise ValueError(
                f"Unknown method '{method_name}' and no custom_prompt provided"
            )

        self._template = PromptTemplate(template_text)

    @property
    def method_name(self) -> str:
        return self._method_name

    @property
    def prompt_text(self) -> str:
        """The raw prompt template text."""
        return self._template.template

    def generate(
        self,
        input: str,
        target: Optional[str] = None,
        reference: Optional[str] = None,
        history: str = "",
        **kwargs: Any,
    ) -> Checklist:
        """Generate checklist from input using template + structured output.

        Automatically detects which placeholders the template needs and passes
        only those. This allows the same class to handle TICK (input only),
        RocketEval (input + reference + history), RLCF-direct
        (input + reference), etc.
        """
        # Build format kwargs — only pass placeholders that exist in template
        format_kwargs: dict[str, str] = {"input": input}
        if "target" in self._template._placeholders and target is not None:
            format_kwargs["target"] = target
        if "reference" in self._template._placeholders:
            if reference is None:
                raise ValueError(
                    f"{self._method_name} requires a reference target."
                )
            format_kwargs["reference"] = reference
        if "history" in self._template._placeholders:
            format_kwargs["history"] = history

        # Load format instructions (skip for custom schemas)
        format_text = load_format(self._format_name) if self._format_name else ""

        # Inject format inline if template has {format_instructions} placeholder,
        # otherwise append after the prompt (default).
        if "format_instructions" in self._template._placeholders:
            format_kwargs["format_instructions"] = format_text
            full_prompt = self._template.format(**format_kwargs)
        else:
            prompt = self._template.format(**format_kwargs)
            full_prompt = prompt + "\n\n" + format_text

        # Call model with structured output
        response_format = to_response_format(
            self._response_schema, self._method_name
        )
        raw = self._call_model(full_prompt, response_format=response_format)

        # Parse structured response
        items = self._parse_structured(raw)

        return Checklist(
            items=items,
            source_method=self.method_name,
            generation_level=self.generation_level,
            input=input,
            metadata={"raw_response": raw},
        )

    def _parse_structured(self, raw: str) -> list[ChecklistItem]:
        """Parse JSON response using Pydantic schema.

        Primary path: json.loads() succeeds (structured output).
        Fallback path: extract_json() extracts JSON from raw text.

        Auto-detects the list field and item fields from the schema,
        supporting both built-in and custom response schemas.
        """
        try:
            data = json.loads(raw)
        except json.JSONDecodeError:
            data = extract_json(raw)
        validated = self._response_schema.model_validate(data)

        # Find the list field (first List[BaseModel] field)
        item_list = self._get_item_list(validated)

        items = []
        for q in item_list[: self.max_items]:
            q_data = q.model_dump() if hasattr(q, "model_dump") else {}
            # Find question text: use 'question' field, or first str field
            question, question_key = self._get_question_text(q, q_data)
            weight = getattr(q, "weight", 100.0)
            category = getattr(q, "category", None)
            # Extra fields → metadata
            known = {question_key, "weight", "category"}
            extra = {k: v for k, v in q_data.items() if k not in known}
            items.append(
                ChecklistItem(
                    question=question,
                    weight=weight,
                    category=category,
                    metadata=extra if extra else {},
                )
            )
        return items

    @staticmethod
    def _get_item_list(validated: Any) -> list:
        """Extract the list of items from a validated response model."""
        # Try 'questions' first (built-in convention)
        if hasattr(validated, "questions"):
            return validated.questions
        # Auto-detect: first list attribute
        for field_name in type(validated).model_fields:
            value = getattr(validated, field_name)
            if isinstance(value, list):
                return value
        raise ValueError(
            f"Cannot find list field in {type(validated).__name__}. "
            "Schema must have a list field (e.g., 'questions', 'items')."
        )

    @staticmethod
    def _get_question_text(item: Any, item_data: dict) -> tuple[str, str]:
        """Extract question text and its field key from an item."""
        if isinstance(item, str):
            return item, "question"
        if hasattr(item, "question"):
            return item.question, "question"
        # Fall back to first str field
        for key, value in item_data.items():
            if isinstance(value, str):
                return value, key
        raise ValueError(
            f"Cannot find question text in {type(item).__name__}. "
            "Item must have a 'question' field or at least one str field."
        )

prompt_text property

The raw prompt template text.

generate(input, target=None, reference=None, history='', **kwargs)

Generate checklist from input using template + structured output.

Automatically detects which placeholders the template needs and passes only those. This allows the same class to handle TICK (input only), RocketEval (input + reference + history), RLCF-direct (input + reference), etc.

Source code in autochecklist/generators/instance_level/direct.py
def generate(
    self,
    input: str,
    target: Optional[str] = None,
    reference: Optional[str] = None,
    history: str = "",
    **kwargs: Any,
) -> Checklist:
    """Generate checklist from input using template + structured output.

    Automatically detects which placeholders the template needs and passes
    only those. This allows the same class to handle TICK (input only),
    RocketEval (input + reference + history), RLCF-direct
    (input + reference), etc.
    """
    # Build format kwargs — only pass placeholders that exist in template
    format_kwargs: dict[str, str] = {"input": input}
    if "target" in self._template._placeholders and target is not None:
        format_kwargs["target"] = target
    if "reference" in self._template._placeholders:
        if reference is None:
            raise ValueError(
                f"{self._method_name} requires a reference target."
            )
        format_kwargs["reference"] = reference
    if "history" in self._template._placeholders:
        format_kwargs["history"] = history

    # Load format instructions (skip for custom schemas)
    format_text = load_format(self._format_name) if self._format_name else ""

    # Inject format inline if template has {format_instructions} placeholder,
    # otherwise append after the prompt (default).
    if "format_instructions" in self._template._placeholders:
        format_kwargs["format_instructions"] = format_text
        full_prompt = self._template.format(**format_kwargs)
    else:
        prompt = self._template.format(**format_kwargs)
        full_prompt = prompt + "\n\n" + format_text

    # Call model with structured output
    response_format = to_response_format(
        self._response_schema, self._method_name
    )
    raw = self._call_model(full_prompt, response_format=response_format)

    # Parse structured response
    items = self._parse_structured(raw)

    return Checklist(
        items=items,
        source_method=self.method_name,
        generation_level=self.generation_level,
        input=input,
        metadata={"raw_response": raw},
    )

ContrastiveGenerator

Bases: DirectGenerator

Generate checklists by comparing candidate responses (RLCF candidate modes).

Extends DirectGenerator with candidate auto-generation. Candidates are generated by smaller models and included in the prompt for contrastive analysis.

Two modes: - rlcf_candidate: input + reference + candidates - rlcf_candidates_only: input + candidates (no reference)

Source code in autochecklist/generators/instance_level/contrastive.py
class ContrastiveGenerator(DirectGenerator):
    """Generate checklists by comparing candidate responses (RLCF candidate modes).

    Extends DirectGenerator with candidate auto-generation. Candidates are
    generated by smaller models and included in the prompt for contrastive
    analysis.

    Two modes:
    - rlcf_candidate: input + reference + candidates
    - rlcf_candidates_only: input + candidates (no reference)
    """

    def __init__(
        self,
        candidate_models: Optional[List[str]] = None,
        num_candidates: int = 4,
        generate_candidates: Optional[bool] = None,
        candidate_provider: Optional[str] = None,
        candidate_base_url: Optional[str] = None,
        candidate_api_key: Optional[str] = None,
        candidate_api_format: Optional[str] = None,
        **kwargs: Any,
    ):
        super().__init__(**kwargs)
        # Read generate_candidates from preset if not explicitly provided
        from .pipeline_presets import PIPELINE_PRESETS
        preset = PIPELINE_PRESETS.get(self._method_name, {})
        if generate_candidates is None:
            self.generate_candidates = preset.get("generate_candidates", True)
        else:
            self.generate_candidates = generate_candidates
        self.candidate_models = candidate_models
        self.num_candidates = num_candidates
        self._candidate_provider = candidate_provider
        self._candidate_base_url = candidate_base_url
        self._candidate_api_key = candidate_api_key
        self._candidate_api_format = candidate_api_format

    def generate(
        self,
        input: str,
        target: Optional[str] = None,
        reference: Optional[str] = None,
        candidates: Optional[Union[List[str], Dict[str, str]]] = None,
        **kwargs: Any,
    ) -> Checklist:
        """Generate checklist from input + candidates.

        Args:
            input: The instruction/query
            target: Alias for reference
            reference: Expert/reference target (optional for candidates_only)
            candidates: Candidate responses. Can be:
                - List[str]: multiple candidates (RLCF or listwise)
                - Dict with "chosen"/"rejected" keys (pairwise CRG)
                - None: auto-generated if candidate_models is set
            **kwargs: Additional arguments
        """
        # Get or generate candidates
        if candidates is None:
            if self.generate_candidates and self.candidate_models:
                candidates = self._generate_candidates(input)
            else:
                raise ValueError(
                    f"{self.method_name} requires 'candidates' argument."
                )

        # Delegate to _generate_with_candidates with raw candidates
        checklist = self._generate_with_candidates(
            input=input,
            candidates=candidates,
            reference=reference,
            **kwargs,
        )
        # Store raw candidates and count in metadata
        if isinstance(candidates, dict):
            checklist.metadata["candidates"] = list(candidates.values())
            checklist.metadata["num_candidates"] = 2
        else:
            checklist.metadata["candidates"] = candidates
            checklist.metadata["num_candidates"] = len(candidates)
        return checklist

    def _generate_with_candidates(
        self,
        input: str,
        candidates: Union[List[str], Dict[str, str]],
        reference: Optional[str] = None,
        **kwargs: Any,
    ) -> Checklist:
        """Build prompt with candidates and call model.

        Routes candidates to template placeholders based on type and template:
        - Dict → {chosen} + {rejected} placeholders (pairwise CRG)
        - List + {responses} placeholder → numbered Response blocks (listwise)
        - List + {candidates} placeholder → numbered Candidate blocks (RLCF)
        """
        placeholders = self._template._placeholders
        format_kwargs: dict[str, str] = {"input": input}

        # --- Route candidates to placeholders ---
        if isinstance(candidates, dict):
            # Pairwise: dict must have chosen+rejected, template must have those placeholders
            if "candidates" in placeholders:
                raise ValueError(
                    "Template has {candidates} placeholder but received dict candidates. "
                    "Use {chosen}/{rejected} placeholders for pairwise, or pass a list."
                )
            if not {"chosen", "rejected"} <= placeholders:
                raise ValueError(
                    "Template must have {chosen} and {rejected} placeholders for dict candidates."
                )
            if set(candidates.keys()) != {"chosen", "rejected"}:
                raise ValueError(
                    "Dict candidates must have exactly 'chosen' and 'rejected' keys, "
                    f"got: {set(candidates.keys())}"
                )
            format_kwargs["chosen"] = candidates["chosen"]
            format_kwargs["rejected"] = candidates["rejected"]
        else:
            # List candidates
            if "chosen" in placeholders or "rejected" in placeholders:
                raise ValueError(
                    "Template has {chosen}/{rejected} placeholders but received list candidates. "
                    "Pass a dict with 'chosen' and 'rejected' keys instead."
                )
            if "responses" in placeholders:
                format_kwargs["responses"] = self._format_ordered_responses(candidates)
            elif "candidates" in placeholders:
                format_kwargs["candidates"] = self._format_candidates(candidates)
            else:
                raise ValueError(
                    "Template must have {candidates} or {responses} placeholder for list candidates."
                )

        # --- Handle optional placeholders ---
        if "context" in placeholders:
            format_kwargs["context"] = kwargs.pop("context", "")

        if "reference" in placeholders:
            if reference is None:
                raise ValueError(
                    f"{self.method_name} requires a reference target."
                )
            format_kwargs["reference"] = reference

        # Load format instructions (skip for custom schemas)
        format_text = load_format(self._format_name) if self._format_name else ""

        # Inject format inline if template has {format_instructions} placeholder,
        # otherwise append after the prompt (default).
        if "format_instructions" in placeholders:
            format_kwargs["format_instructions"] = format_text
            full_prompt = self._template.format(**format_kwargs)
        else:
            prompt = self._template.format(**format_kwargs)
            full_prompt = prompt + "\n\n" + format_text

        response_format = to_response_format(
            self._response_schema, self._method_name
        )
        raw = self._call_model(full_prompt, response_format=response_format)
        items = self._parse_structured(raw)

        return Checklist(
            items=items,
            source_method=self.method_name,
            generation_level=self.generation_level,
            input=input,
            metadata={"raw_response": raw},
        )

    def _get_candidate_client(self) -> Any:
        """Get client for candidate generation.

        If any candidate_* provider param is set, creates a separate client.
        Otherwise falls back to the main client via _get_or_create_client().
        """
        if any([
            self._candidate_provider,
            self._candidate_base_url,
            self._candidate_api_key,
            self._candidate_api_format,
        ]):
            return get_client(
                provider=self._candidate_provider or self._provider,
                base_url=self._candidate_base_url,
                api_key=self._candidate_api_key,
                model=self.model,
                api_format=self._candidate_api_format,
            )
        return self._get_or_create_client()

    def _generate_candidates(self, input: str) -> List[str]:
        """Generate candidate responses using smaller models."""
        candidates = []
        client = self._get_candidate_client()

        if len(self.candidate_models) > 1:
            for model in self.candidate_models:
                resp = client.chat_completion(
                    model=model,
                    messages=[{"role": "user", "content": input}],
                    temperature=0.7,
                    max_tokens=1024,
                )
                candidates.append(resp["choices"][0]["message"]["content"])
        else:
            model = self.candidate_models[0]
            for _ in range(self.num_candidates):
                resp = client.chat_completion(
                    model=model,
                    messages=[{"role": "user", "content": input}],
                    temperature=0.9,
                    max_tokens=1024,
                )
                candidates.append(resp["choices"][0]["message"]["content"])

        return candidates

    def _format_ordered_responses(self, responses: List[str]) -> str:
        """Format responses as numbered Response blocks for listwise CRG."""
        formatted = []
        for i, response in enumerate(responses, 1):
            formatted.append(f"### Response {i}\n{response}")
        return "\n\n".join(formatted)

    def _format_candidates(self, candidates: List[str]) -> str:
        """Format candidate responses for prompt injection."""
        formatted = []
        for i, candidate in enumerate(candidates, 1):
            formatted.append(f"### Candidate {i}\n{candidate}")
        return "\n\n".join(formatted)

generate(input, target=None, reference=None, candidates=None, **kwargs)

Generate checklist from input + candidates.

Parameters:

Name Type Description Default
input str

The instruction/query

required
target Optional[str]

Alias for reference

None
reference Optional[str]

Expert/reference target (optional for candidates_only)

None
candidates Optional[Union[List[str], Dict[str, str]]]

Candidate responses. Can be: - List[str]: multiple candidates (RLCF or listwise) - Dict with "chosen"/"rejected" keys (pairwise CRG) - None: auto-generated if candidate_models is set

None
**kwargs Any

Additional arguments

{}
Source code in autochecklist/generators/instance_level/contrastive.py
def generate(
    self,
    input: str,
    target: Optional[str] = None,
    reference: Optional[str] = None,
    candidates: Optional[Union[List[str], Dict[str, str]]] = None,
    **kwargs: Any,
) -> Checklist:
    """Generate checklist from input + candidates.

    Args:
        input: The instruction/query
        target: Alias for reference
        reference: Expert/reference target (optional for candidates_only)
        candidates: Candidate responses. Can be:
            - List[str]: multiple candidates (RLCF or listwise)
            - Dict with "chosen"/"rejected" keys (pairwise CRG)
            - None: auto-generated if candidate_models is set
        **kwargs: Additional arguments
    """
    # Get or generate candidates
    if candidates is None:
        if self.generate_candidates and self.candidate_models:
            candidates = self._generate_candidates(input)
        else:
            raise ValueError(
                f"{self.method_name} requires 'candidates' argument."
            )

    # Delegate to _generate_with_candidates with raw candidates
    checklist = self._generate_with_candidates(
        input=input,
        candidates=candidates,
        reference=reference,
        **kwargs,
    )
    # Store raw candidates and count in metadata
    if isinstance(candidates, dict):
        checklist.metadata["candidates"] = list(candidates.values())
        checklist.metadata["num_candidates"] = 2
    else:
        checklist.metadata["candidates"] = candidates
        checklist.metadata["num_candidates"] = len(candidates)
    return checklist