{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Statistics
( statTheta
, statTask
, bayes
) where
import Irt
import Likelihood
import Types
import Control.Arrow
import Control.Monad
import Control.Monad.ST.Safe
import Data.List
import qualified Data.Vector.Generic as V
import qualified Data.Vector.Unboxed as UV
import Statistics.Sample
import System.Random.MWC
statTheta :: StatisticType -> ContestantsData -> IO Statistic
statTheta Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs
statTheta SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs
where
prop = mean . V.map (ok . snd)
ok True = 1
ok False = 0
statTheta LogLikelihood xs = return . SingleStatistic . map (thetaVSumMap logLikelihood) $ xs
statTheta DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] .
map (thetaVSumMap edlogl) $ xs
where
edlogl _ d t = [elog, dlog]
where
lt = logLikelihood False d t
lf = logLikelihood True d t
elog = lt * exp lt + lf * exp lf
dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int)
statTheta FisherSEM xs = return . SingleStatistic . map fish $ xs
where
fish = (1 /) . sqrt . thetaVSumMap inf2
inf2 _r d t = (-paramA d) * (paramA d) * logLikelihood't True d t
* logLikelihood't False d t
statTheta Bootstrap xss = withSystemRandom $ asGenST calc
where
calc g = do
seed <- save g
return $ ListStatistic ["BootstrapSEM", "BootstrapLb", "BootstrapUb"]
. map (bootstrap seed 100000) $ xss
stat :: [Double] -> (Double, Double, Double)
stat xs = (sem, lb, ub)
where
n = length xs
sem = sqrt . varianceUnbiased . UV.fromList $ xs
confidence = 0.95 :: Rational
dist = (n - round (fromIntegral n * confidence)) `div` 2
select = (!! dist)
(lb, ub) = (select &&& (select . reverse)) . sort $ xs
bootstrap seed n xs = [err,lb,ub]
where
(err,lb,ub) = runST $ do
g <- restore seed
v <- bootstrap' g n xs
return $ stat v
bootstrap' g n (t,xs) = replicateM n $ do
xs' <- resample g xs
return $! estTheta t xs'
resample g xs = return . V.map (xs V.!) =<< V.replicateM l (uniformR (0,l-1) g)
where l = V.length xs
statTask :: StatisticType -> TasksData -> IO Statistic
statTask Count xs = return . SingleStatistic . map (fromIntegral . V.length . snd) $ xs
statTask SolvedProp xs = return . SingleStatistic . map (prop . snd) $ xs
where
prop = mean . V.map (ok . snd)
ok True = 1
ok False = 0
statTask LogLikelihood xs = return . SingleStatistic . map (paramVSumMap logLikelihood) $ xs
statTask DLogLikelihood xs = return . ListStatistic ["ELogLikelihood", "DLogLikelihood"] .
map (paramVSumMap edlogl) $ xs
where
edlogl _ d t = [elog, dlog]
where
lt = logLikelihood False d t
lf = logLikelihood True d t
elog = lt * exp lt + lf * exp lf
dlog = (lt-lf)^(2::Int) * (exp $ lt + lf) / (1+paramC d)^(3::Int)
bayes :: Double -> ContestantData -> ((Double, Double), [(Double, Double)])
bayes confidence (theta, tasks) = ((lbound, ubound), distribution)
where
pick (lh, _, dt) = (p, p * dt)
where
p = exp lh
fg t = pick $ thetaVSumMap logL (t,tasks)
maxp = fst . fg $ theta
aintegral = integrate $ evaluate 0.3 (0.1*maxp) (fst thetaBound) (snd thetaBound)
points = evaluate 0.3 (0.002*aintegral) (fst thetaBound) (snd thetaBound)
integral = integrate points
distribution :: [(Double, Double)]
distribution = map (second (/integral)) points
percentile = (1 - confidence) / 2
lbound = bound percentile distribution
ubound = bound percentile (reverse distribution)
evaluate maxx maxv x0 x1 = evaluate' l1 r1 ++ [second fst r1]
where
l1 = (x0, fg x0)
r1 = (x1, fg x1)
split l r x = evaluate' l m ++ evaluate' m r
where m = (x, fg x)
evaluate' l@(xl, (vl, dl)) r@(xr, (vr, dr))
| xr - xl > maxx * 1.01 = split l r (xl + maxx)
| (xr - xl) * (abs (dv - dl) + abs (dv - dr)) > maxv = split l r ((xl + xr) / 2)
| otherwise = [(xl, vl)]
where
dv = (vr - vl) / (xr - xl)
integrate :: [(Double, Double)] -> Double
integrate = integrate' 0
where
integrate' !c ((xl,vl):next@((xr,vr):_)) = integrate' (c + (xr - xl) * (vl + vr) / 2) next
integrate' !c _ = c
bound !c ((xl, vl):next@((xr,vr):_))
| c > v = bound (c-v) next
| otherwise = xl + (xr - xl) * c / v
where
v = abs $ (xr - xl) * (vl + vr) / 2