diff --git a/garak/probes/latentinjection.py b/garak/probes/latentinjection.py index 9392b991..faa0dd7d 100644 --- a/garak/probes/latentinjection.py +++ b/garak/probes/latentinjection.py @@ -463,7 +463,7 @@ class LatentInjectionFactSnippetLegal(LatentInjectionFactSnippetEiffel): ] -class LatentJailbreak(Probe, LatentInjectionMixin): +class LatentJailbreak(LatentInjectionMixin, Probe): """Implementation of Latent Jailbreak, using latent prompt injection- style distraction to get model to write hateful/harmful text @@ -547,6 +547,7 @@ class LatentJailbreakMini(LatentJailbreak): "instruction_count": 3, "trigger_count": 4, } + active = True # has a lot of prompts - default to mini version def __init__(self, config_root=_config): super().__init__(config_root)