# Run MMLU with classification
python -m wisent tasks mmlu --model meta-llama/Llama-3.1-8B-Instruct --layer 15 --limit 100 --classifier-type logistic

# Run with steering
python -m wisent tasks truthfulqa_mc1 --model meta-llama/Llama-3.1-8B-Instruct --layer 15 --steering-mode --steering-strength 1.5

# Run all benchmarks
python -m wisent tasks --all --model meta-llama/Llama-3.1-8B-Instruct

# List available tasks
python -m wisent tasks --list-tasks

# Cross-benchmark evaluation
python -m wisent tasks --train-task mmlu --eval-task truthfulqa_mc1 --cross-benchmark

agent

Autonomous agent with quality control and steering capabilities.

python -m wisent agent "Write a helpful response about climate change" \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --quality-threshold 0.3 \
  --max-attempts 3 \
  --steering-mode \
  --steering-strength 1.0

multi-steer

Combine multiple steering vectors dynamically at inference time.

python -m wisent multi-steer \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --vector helpfulness.pt:1.0 \
  --vector honesty.pt:0.8 \
  --vector creativity.pt:0.5 \
  --layer 15 \
  --prompt "Tell me about AI safety" \
  --normalize-weights

Contrastive Pair Generation

generate-pairs

Generate synthetic contrastive pairs from a trait description.

python -m wisent generate-pairs \
  --trait "more helpful and detailed responses" \
  --num-pairs 50 \
  --output ./pairs/helpfulness.json \
  --model meta-llama/Llama-3.1-8B-Instruct

generate-pairs-from-task

Generate contrastive pairs from benchmark tasks.

python -m wisent generate-pairs-from-task \
  --task truthfulqa_mc1 \
  --limit 100 \
  --output ./pairs/truthful.json

diagnose-pairs

Analyze and diagnose contrastive pairs for quality issues.

python -m wisent diagnose-pairs \
  --pairs ./pairs/helpfulness.json \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --verbose

Steering Vector Generation

create-steering-vector

Create a steering vector from contrastive pairs.

python -m wisent create-steering-vector \
  --pairs ./pairs/helpfulness.json \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --layer 15 \
  --output ./vectors/helpfulness.pt

generate-vector-from-task

Generate steering vector directly from a benchmark task.

python -m wisent generate-vector-from-task \
  --task truthfulqa_mc1 \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --layer 15 \
  --limit 200 \
  --output ./vectors/truthful.pt

synthetic

Pipeline end to end: Generate pairings using descriptions of traits and construct a steering vector. You are an expert

python -m wisent synthetic \
  --trait "responds with more empathy and understanding" \
  --num-pairs 30 \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --layer 15 \
  --save-pairs ./pairs/empathy.json

diagnose-vectors

Analyze steering vectors for quality and effectiveness.

python -m wisent diagnose-vectors \
  --vector ./vectors/helpfulness.pt \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --verbose

verify-steering

Verify that steering is working as expected.

python -m wisent verify-steering \
  --vector ./vectors/helpfulness.pt \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --layer 15

Activations

get-activations

Extract and save activations from model layers.

python -m wisent get-activations \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --layer 15 \
  --input "Hello, how are you?" \
  --output ./activations/

check-linearity

Check linearity of activations across layers.

python -m wisent check-linearity \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --pairs ./pairs/helpfulness.json

Optimization Commands

optimize

Full Optuna-based optimization for classification, steering, and weights.

python -m wisent optimize \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --task truthfulqa_mc1 \
  --n-trials 50

optimize-classification

Optimize classification parameters (layer, threshold, aggregation).

python -m wisent optimize-classification \
  meta-llama/Llama-3.1-8B-Instruct \
  --limit 500 \
  --optimization-metric f1 \
  --layer-range 10-20 \
  --save-classifiers

optimize-steering

Optimize steering parameters with multiple subcommands.

# Auto optimization
python -m wisent optimize-steering auto meta-llama/Llama-3.1-8B-Instruct --task truthfulqa_mc1

# Compare steering methods
python -m wisent optimize-steering compare-methods meta-llama/Llama-3.1-8B-Instruct --methods CAA

# Optimize layer
python -m wisent optimize-steering optimize-layer meta-llama/Llama-3.1-8B-Instruct --layer-range 10-20

# Optimize strength
python -m wisent optimize-steering optimize-strength meta-llama/Llama-3.1-8B-Instruct --strength-range 0.5 2.0

# Personalization optimization
python -m wisent optimize-steering personalization meta-llama/Llama-3.1-8B-Instruct \
  --trait "speaks like a friendly assistant" --num-pairs 20

# Multi-trait personalization
python -m wisent optimize-steering multi-personalization meta-llama/Llama-3.1-8B-Instruct \
  --trait "helpful" --trait "concise" --trait "technical"

optimize-sample-size

Find optimal training sample size for classifiers.

python -m wisent optimize-sample-size \
  meta-llama/Llama-3.1-8B-Instruct \
  --task truthfulqa_mc1 \
  --layer 15 \
  --token-aggregation average \
  --sample-sizes 10 20 50 100 200 500

geometry-search

Search for optimal steering using geometric methods.

python -m wisent geometry-search \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --task truthfulqa_mc1

Weight Modification (Abliteration)

modify-weights

Permanently modify model weights using steering vectors (abliteration).

python -m wisent modify-weights \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --vectors ./vectors/refusal.pt \
  --method norm-preserved \
  --strength 1.0 \
  --output ./abliterated-model

optimize-weights

Optimize weight modification parameters with collateral damage monitoring.

python -m wisent optimize-weights \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --task refusal-reduction \
  --eval-task mmlu \
  --max-collateral-damage 0.05 \
  --output ./optimized-abliterated-model

Response Generation & Evaluation

generate-responses

Generate responses with optional steering.

python -m wisent generate-responses \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --prompts ./prompts.json \
  --vector ./vectors/helpfulness.pt \
  --strength 1.5 \
  --output ./responses.json

evaluate-responses

Evaluate generated responses for quality.

python -m wisent evaluate-responses \
  --responses ./responses.json \
  --output ./evaluation-report.json

evaluate-refusal

Evaluate model refusal behavior before/after modification.

python -m wisent evaluate-refusal \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --prompts ./harmful-prompts.json

Unified Goodness Training

train-unified-goodness

Train a single "goodness" steering vector from pooled multi-benchmark data.

python -m wisent train-unified-goodness \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --benchmarks truthfulqa_mc1 mmlu hellaswag \
  --samples-per-benchmark 100 \
  --output ./vectors/unified-goodness.pt

Utility Commands

cluster-benchmarks

Cluster benchmarks by similarity for strategic selection.

python -m wisent cluster-benchmarks \
  --model meta-llama/Llama-3.1-8B-Instruct

inference-config

Manage inference configuration settings.

python -m wisent inference-config --show
python -m wisent inference-config --set max-tokens 512

optimization-cache

Manage the optimization results cache.

python -m wisent optimization-cache --list
python -m wisent optimization-cache --clear

Global Options

These options are available for most commands:

Option	Description
--model	Model name or path (HuggingFace format)
--layer	Layer(s) for activation extraction (e.g., 15, 14-16, 14,15,16)
--device	Device to run on (cuda, cpu, mps)
--verbose	Enable verbose output
--seed	Random seed for reproducibility
--output	Output directory or file path
--limit	Limit number of samples

Quick Reference

Command	Purpose
tasks	Run benchmarks with classification or steering
agent	Autonomous agent with quality control
generate-pairs	Generate synthetic contrastive pairs
synthetic	End-to-end pair generation + vector training
multi-steer	Combine multiple steering vectors
optimize	Full Optuna-based parameter optimization
modify-weights	Permanent weight modification (abliteration)
train-unified-goodness	Train unified goodness vector from multiple benchmarks

Stay in the loop. Never miss out.

Subscribe to our newsletter and unlock Wisent insights.

Contact Careers Privacy Policy Terms of Service