package kdo.reinforcement;

import java.util.List;
import kdo.domain.IOperator;
import kdo.domain.IProblemState;

/* loaded from: input_file:kdo/reinforcement/TDValueLearning.class */
public class TDValueLearning extends TemporalDifference implements IValueFunctionUpdate {
    public TDValueLearning() {
    }

    public TDValueLearning(ILearnrateStrategy iLearnrateStrategy) {
        super(iLearnrateStrategy);
    }

    @Override // kdo.reinforcement.IValueFunctionUpdate
    public void update(IProblemState iProblemState, List<IProblemState> list, IOperator iOperator) {
        if (iProblemState.checkGoalState()) {
            iProblemState.onExploration();
            iProblemState.setUtility(((iProblemState.getUtility() * (r0 - 1)) + iProblemState.getReinforcement()) / iProblemState.getExplorationCount());
        }
        if (list.size() > 1) {
            IProblemState iProblemState2 = list.get(list.size() - 2);
            iProblemState2.onExploration();
            float alpha = this.learnrateStrategy.getAlpha(iProblemState2.getExplorationCount());
            float utility = iProblemState2.getUtility();
            iProblemState2.setUtility(utility + (alpha * ((iProblemState2.getReinforcement() + iProblemState.getUtility()) - utility)));
        }
    }
}
