function [ cost, delta, acc ] = grad_hid_disc1(hid, lab, pars)

lab = lab+1;

idx = 1;
cost = 0;
acc = 0;
for l = 1:length(pars.card),
    curidx = idx:(idx + pars.card(l) - 1);
    L = repmat(oneofc(lab(l), pars.card(l)), 1, pars.batchsize);
    [ cost_l, delta_ref, acc ] = grad_softmax(L, hid(curidx, :), pars);
    cost = cost + cost_l;
    delta(curidx,:) = delta(curidx,:) + delta_ref;

    idx = idx + pars.card(l);
end

acc = acc / length(pars.card);

delta = pars.alpha*delta;

cost = pars.alpha * cost;

end

function [ cost, delta, acc ] = grad_softmax(L, P, pars)

P = exp(bsxfun(@minus, P, max(P, [], 1)));
pred = bsxfun(@rdivide, P, sum(P));
[~,pred_quantized] = max(pred,[],1);
[ yvec, ~] = find(L);
acc = mean(pred_quantized(:) == yvec(:));
cost = (-1/pars.batchsize) * sum(sum(L.*log(pred)));
delta = (1/pars.batchsize) * (pred - L);

end

