pub fn perplexity(cross_entropy_loss: f64) -> f64
Perplexity from cross-entropy loss: exp(loss).
Lower perplexity = better language model.