function [ Ppred, params, rmse ] = fit_multicycle(time,prod,nk,nseeds) %Find best fitting parameters tmax, pmax, and a for a number of Hubbert cycles % Uses logistic curves as bases. % Requires: prod (time series of production), time (vector of times for % each data), nk (number of curves to fit), and nseeds (number of random % seeds to try for finding global minimum) % Returns: params (nk x 3 matrix of tmax, pmax, and a), RMSE (root mean % square error misfit of multicylce model to data), and Ppred (predicted % production curve). % % Written by James Conder, January 2011 as supplemental material to: % Anderson and Conder, Discussion of multicyclic Hubbert modeling as a % method for forecasting future petroleum production, % Energy and Fuels, 2011 ndata = length(time) ; % number of data s = size(time) ; if s(1) == 1 % force data into column vectors time = time' ; prod = prod' ; end cumprod = cumsum(prod) ; % each curve has three free parameters: pmax, a, & tmax % Pti = 4*pmaxi*exp(-ai*(t-tmaxi))/((1 + exp(-a*(t-tmaxi))^2) Qtot = cumprod(end) ; % total cumulative production of data for iseeds = 1:nseeds tmax = (max(time) - min(time))*rand(1,nk) + min(time) ; tmax = sort(tmax) ; % re-order seeds a = 0.4*rand(1,nk) + 0.25 ; pmax = (2*ones(1,nk) + 0.8*rand(1,nk))*Qtot/nk ; % +/- 40% Qmax = 4*pmax./a ; % optimize fits %disp([iseeds]) [Qpre tmax2 Qinf a2] = fit_logistic2(time,cumprod,tmax,Qmax,a) ; tmax = tmax2' ; a = a2' ; pmax = (Qinf'.*a/4) ; Psum = prod*0 ; % find misfit for i = 1:nk Pi = 4*pmax(i)*exp(-a(i)*(time-tmax(i))) ; Pi = Pi./((1. + exp(-a(i)*(time-tmax(i)))).^2) ; Psum = Psum + Pi ; end chitest = sum((Psum - prod).^2) ; if iseeds == 1 chibest = chitest ; tmaxbest = tmax ; pmaxbest = pmax ; abest = a ; if isnan(chibest) chibest = 1.e6 ; end else if chitest < chibest % keep model with lower misfit chibest = chitest ; tmaxbest = tmax ; pmaxbest = pmax ; abest = a ; end end end tmax = tmaxbest ; % best model found pmax = pmaxbest ; a = abest ; [Y,I] = sort(tmax) ; tmax = tmax(I) ; pmax = pmax(I) ; a = a(I) ; %%% grid search final set of curves in. Optimization for cumulative production is %%% much more stable, but often biases either early or late production as %%% errors in early curves propagate through entire system (unlike straight %%% production) dt = 0.1 ; dp = 0.002 ; da = 0.002 ; for kk = 1:10 % times to loop over all curves for icurve = 1:nk % loop over curves %disp([num2str(tmax(icurve)) ' ' num2str(pmax(icurve)) ' ' num2str(a(icurve))]) Psum = 0*prod ; for i = 1:nk Pi = 4*pmax(i)*exp(-a(i)*(time-tmax(i))) ; Pi = Pi./((1. + exp(-a(i)*(time-tmax(i)))).^2) ; if i ~= icurve Psum = Psum + Pi ; end end tchk = tmax(icurve)-4*dt:dt:tmax(icurve)+4*dt ; % grid search ranges pchk = pmax(icurve)-20*dp:dp:pmax(icurve)+20*dp ; achk = a(icurve)-20*da:da:a(icurve)+20*da ; for ttest = tchk % tmax for ptest = pchk % pmax for atest = achk % a Pi = 4*ptest*exp(-atest*(time-ttest)) ; Pi = Pi./((1. + exp(-atest*(time-ttest))).^2) ; Psumtest = Psum + Pi ; chitest = sum((Psumtest - prod).^2) ; if chitest < chibest %disp(chitest) chibest = chitest ; tmax(icurve) = ttest ; pmax(icurve) = ptest ; a(icurve) = atest ; end end end end end %disp([num2str(tmax(icurve)) ' ' num2str(pmax(icurve)) ' ' num2str(a(icurve))]) end Ppred = 0*prod ; % predicted production for i = 1:nk Pi = 4*pmax(i)*exp(-a(i)*(time-tmax(i))) ; Pi = Pi./((1. + exp(-a(i)*(time-tmax(i)))).^2) ; Ppred = Ppred + Pi ; end rmse = sqrt(chibest/ndata) ; params = [ tmax' pmax' a' ] end function [ Qpre t0 Qinf a] = fit_logistic2(t,Q,tstar,Qinfstar,astar) %fit a logistic function to time series Q(t). Args:t,Q,tstar,Qinfstar,astar % output are Qpre (logistic model fit to data) and % t0, Qinf, and a, the three independant parameters for % logistic: Q(t) = Qinf/(1 + exp(-a*(t-t0))) % Qinf is value as t --> infinity % t0 is symmetric inflection point % a is time decay constant % % tstar, Qinfstar, and astar are initial guesses for parameters % James Conder, November 2010 n = length(t) ; s = size(t) ; if s(1) == 1 % force data into column vector t = t' ; Q = Q' ; end nk = length(tstar) ; % number of curves to fit s = size(tstar) ; iimax = 1000 ; % number of maximum iterations if s(1) == 1 tstar = tstar' ; Qinfstar = Qinfstar' ; astar = astar' ; end % set "best-fitting" parameters to seed values t0 = tstar ; Qinf = Qinfstar ; a = astar ; epsilon = 1 ; % iterate to locally optimal solution ii = 0 ; thresh = 1.e-7 ; % threshold to stop iterating while epsilon > thresh ii = ii + 1 ; Qpre = 0*Q ; for k = 1:nk Qpre = Qpre + Qinf(k)./(1 + exp(-a(k)*(t-t0(k)))) ; % 'predicted' data end d = Q - Qpre ; % data vector (predicted - observed) G = zeros(n,3*nk) ; % dimensionalize partial derivative matrix % set up partial derivative matrix for k = 1:nk ik = (k-1)*3 ; ee = exp(-a(k)*(t-t0(k))) ; eee = 1./((1 + ee).^2) ; G(:,ik+1) = -Qinf(k)*a(k)*(ee.*eee) ; % dd/dt0 G(:,ik+2) = 1./(1 + ee) ; % dd/dQinf G(:,ik+3) = Qinf(k)*(t-t0(k)).*(ee.*eee) ; % dd/da end inan = find(isnan(G)) ; % check for pesky NaNs G(inan) = 0 ; % solve for change to model parameters if isnan(1) dm = (G'*G)\(G'*d) ; % standard least squares else [U,S,V] = svd(G,0) ; % SVD Sinvdiag = 1./diag(S) ; ising = find(Sinvdiag(1)./Sinvdiag < 1.e-12) ; Sinvdiag(ising) = 0 ; Sinv = diag(Sinvdiag) ; dm = 0.1*V*Sinv*U'*d ; end % get new parameters: m = m0 + dm for k = 1:nk ik = (k-1)*3 ; t0(k) = t0(k) + dm(ik+1) ; Qinf(k) = Qinf(k) + dm(ik+2) ; a(k) = a(k) + dm(ik+3) ; end epsilon = norm(dm) ; ddm(ii) = epsilon ; if ii > iimax %disp(['WARNING: max number of itrations reached']) %disp(['normalized epsilon: ' num2str(epsilon/thresh)]) epsilon = thresh ; end end %disp([num2str(ii) ' iterations. epsilon = ' num2str(ddm(ii))]) end