function [ grad, cost ] = unsup_grad(weights,X,params,gradcheck)

visfac = weights.visfac;
hidfac_a = weights.hidfac_a;
hidfac_b = weights.hidfac_b;
vishid_a = weights.vishid_a;
vishid_b = weights.vishid_b;
visbias = weights.visbias;
hidbias_a = weights.hidbias_a;
hidbias_b = weights.hidbias_b;

kmf = params.kmf;
pbias_a = params.pbias_a;
plambda_a = params.plambda_a;
pbias_b = params.pbias_b;
plambda_b = params.plambda_b;

numdata = size(X,2);
A_history = 0*repmat(hidbias_a,[1,numdata,kmf+1]);
B_history = 0*repmat(hidbias_b,[1,numdata,kmf+1]);

A = sigmoid(bsxfun(@plus,vishid_a*X,hidbias_a));
A_history(:,:,1) = A;
A_prev = 0*A;
B = sigmoid(bsxfun(@plus,vishid_b*X,hidbias_b));
B_history(:,:,1) = B;
B_prev = 0*B;

vtmp = visfac*X;
vh1x = vishid_a*X;
vh2x = vishid_b*X;

if exist('gradcheck','var'),
    order = 0;
else
    order = rand>0.5;
end

for k = 1:kmf,
    A_prev = A;
    B_prev = B;
    if order==0,
        A = sigmoid(bsxfun(@plus, vh1x + hidfac_a'*(vtmp.*(hidfac_b*B)), hidbias_a));
        A_prev = A;
        B = sigmoid(bsxfun(@plus, vh2x + hidfac_b'*(vtmp.*(hidfac_a*A)), hidbias_b));
        A_history(:,:,k+1) = A;
        B_history(:,:,k+1) = B;
    else    
        B = sigmoid(bsxfun(@plus, vh2x + hidfac_b'*(vtmp.*(hidfac_a*A)), hidbias_b));
        B_prev = B;
        A = sigmoid(bsxfun(@plus, vh1x + hidfac_a'*(vtmp.*(hidfac_b*B)), hidbias_a));
        B_history(:,:,k+1) = B;
        A_history(:,:,k+1) = A;
    end
end

dvisfac = 0*visfac;
dhidfac_a = 0*hidfac_a;
dhidfac_b = 0*hidfac_b;
dvishid_a = 0*vishid_a;
dvishid_b = 0*vishid_b;
dhidbias_a = 0*hidbias_a;
dhidbias_b = 0*hidbias_b;

% A sparsity.
pA = A_history(:,:,end);
rho = mean(pA,1);
if params.optgpu,
    target = pbias_a*gones(1,numdata);
else
    target = pbias_a*ones(1,numdata);
end
sp_a = mean(rho);
cost_sp = plambda_a*params.numhid_a*(target*(log(target)-log(rho))' + (1-target)*(log(1-target) - log(1-rho))');
rho_factor = (-pbias_a./rho + (1-pbias_a)./(1-rho));
plrho = rho_factor;
dA = plambda_a*bsxfun(@times, pA.*(1-pA), plrho);

% B sparsity.
pB = B_history(:,:,end);
rho = mean(pB,1);
if params.optgpu,
    target = pbias_b*gones(1,numdata);
else
    target = pbias_b*ones(1,numdata);
end
sp_b = mean(rho);
cost_sp = cost_sp + plambda_b*params.numhid_b*(target*(log(target)-log(rho))' + (1-target)*(log(1-target) - log(1-rho))');
rho_factor = (-pbias_b./rho + (1-pbias_b)./(1-rho));
plrho = rho_factor;
dB = plambda_b*bsxfun(@times, pB.*(1-pB), plrho);

cost_cor = 0;
if isfield(params,'kcorr') &&  params.kcorr > 0,
    mnA = mean(A,2);
    mnB = mean(B,2);
    Ap = bsxfun(@minus,A,mnA);
    Bp = bsxfun(@minus,B,mnB);

    num = Ap*Bp';
    denom = sum(Ap.^2,2)*sum(Bp.^2,2)';
    cost_cor = params.kcorr*0.5*sum(sum((num./sqrt(denom)).^2));
    scale = num./sqrt(denom);

    dA_corr = (scale./sqrt(denom))*Bp - 0.5*bsxfun(@times,(scale.*num.*(denom.^-1.5))*2*sum(Bp.^2,2),Ap);
    dB_corr = (scale'./sqrt(denom'))*Ap - 0.5*bsxfun(@times,(scale'.*num'.*(denom'.^-1.5))*2*sum(Ap.^2,2),Bp);

    dA_corr = dA_corr.*(A.*(1-A));
    dB_corr = dB_corr.*(B.*(1-B));

    dA = dA + params.kcorr*dA_corr;
    dB = dB + params.kcorr*dB_corr;
end

cost = struct('sp',cost_sp,'cor',cost_cor,'total',cost_sp+cost_cor,...
              'sp_a',sp_a, 'sp_b',sp_b);

dB_sav = dB;

% RNN backprop, starting from A.
kstop = max(2,kmf-5);
%for k = (kmf+1):-1:2,
for k = (kmf+1):-1:kstop,
    % A backprop.
    dhidbias_a = dhidbias_a + sum(dA,2);
    btmp = hidfac_b*B_history(:,:,k-1+order);
    dhidfac_a = dhidfac_a + (btmp.*vtmp)*dA';
    dhidfac_b = dhidfac_b + ((hidfac_a*dA).*vtmp)*B_history(:,:,k-1+order)';
    dvisfac = dvisfac + (btmp.*(hidfac_a*dA))*X';
    dvishid_a = dvishid_a + dA*X';

    % B backprop.
    dB = hidfac_b'*(vtmp.*(hidfac_a*dA));
    dB = dB.*B_history(:,:,k-1+order).*(1-B_history(:,:,k-1+order));
    dhidbias_b = dhidbias_b + sum(dB,2);
    dvishid_b = dvishid_b + dB*X';
    if (k==2)&&(order==0), break; end
    atmp = hidfac_a*A_history(:,:,k-1);
    dhidfac_b = dhidfac_b + (atmp.*vtmp)*dB';
    dhidfac_a = dhidfac_a + ((hidfac_b*dB).*vtmp)*A_history(:,:,k-1)';
    dvisfac = dvisfac + (atmp.*(hidfac_b*dB))*X';

    dA = hidfac_a'*(vtmp.*(hidfac_b*dB));
    dA = dA.*A_history(:,:,k-1).*(1-A_history(:,:,k-1));
end
if (order==1) || (kmf==0),
    dhidbias_a = dhidbias_a + sum(dA,2);
    dvishid_a = dvishid_a + dA*X';
end

% Start from the top.
dB = dB_sav;

% RNN backprop, starting from B.
%for k = (kmf+1):-1:2,
for k = (kmf+1):-1:kstop,
    % B backprop.
    dhidbias_b = dhidbias_b + sum(dB,2);
    atmp = hidfac_a*A_history(:,:,k-order);
    dhidfac_b = dhidfac_b + (atmp.*vtmp)*dB';
    dhidfac_a = dhidfac_a + ((hidfac_b*dB).*vtmp)*A_history(:,:,k-order)';
    dvisfac = dvisfac + (atmp.*(hidfac_b*dB))*X';
    dvishid_b = dvishid_b + dB*X';

    % A backprop.
    dA = hidfac_a'*(vtmp.*(hidfac_b*dB));
    dA = dA.*A_history(:,:,k-order).*(1-A_history(:,:,k-order));
    dhidbias_a = dhidbias_a + sum(dA,2);
    dvishid_a = dvishid_a + dA*X';
    if (k==2)&&(order==1), break; end
    btmp = hidfac_b*B_history(:,:,k-1);
    dhidfac_a = dhidfac_a + (btmp.*vtmp)*dA';
    dhidfac_b = dhidfac_b + ((hidfac_a*dA).*vtmp)*B_history(:,:,k-1)';
    dvisfac = dvisfac + (btmp.*(hidfac_a*dA))*X';

    dB = hidfac_b'*(vtmp.*(hidfac_a*dA));
    dB = dB.*B_history(:,:,k-1).*(1-B_history(:,:,k-1));
end
if (order==0) || (kmf==0),
    dhidbias_b = dhidbias_b + sum(dB,2);
    dvishid_b = dvishid_b + dB*X';
end

grad = struct();
grad.visfac = dvisfac;
grad.hidfac_a = dhidfac_a;
grad.hidfac_b = dhidfac_b;
grad.vishid_a = dvishid_a;
grad.vishid_b = dvishid_b;
grad.hidbias_a = dhidbias_a;
grad.hidbias_b = dhidbias_b;
