package kdo.reinforcement;

import java.util.List;
import kdo.domain.IOperator;
import kdo.domain.IProblemState;

/* loaded from: input_file:kdo/reinforcement/TDQLearning.class */
public class TDQLearning extends TemporalDifference implements IValueFunctionUpdate {
    private IQTableAccess qTable;

    public TDQLearning() {
    }

    public TDQLearning(ILearnrateStrategy iLearnrateStrategy) {
        super(iLearnrateStrategy);
    }

    public void setTableAccess(IQTableAccess iQTableAccess) {
        this.qTable = iQTableAccess;
    }

    @Override // kdo.reinforcement.IValueFunctionUpdate
    public void update(IProblemState iProblemState, List<IProblemState> list, IOperator iOperator) {
        if (iProblemState.checkGoalState()) {
            float reinforcement = iProblemState.getReinforcement();
            IOperator bestAction = this.qTable.getBestAction(iProblemState);
            float utility = this.qTable.getUtility(iProblemState, bestAction);
            float explorationCount = this.qTable.getExplorationCount(iProblemState, bestAction);
            this.qTable.update(iProblemState, bestAction, ((utility * explorationCount) + reinforcement) / (explorationCount + 1.0f));
        }
        if (list.size() > 1) {
            IProblemState iProblemState2 = list.get(list.size() - 2);
            float alpha = this.learnrateStrategy.getAlpha(this.qTable.getExplorationCount(iProblemState2, iOperator));
            float utility2 = this.qTable.getUtility(iProblemState2, iOperator);
            this.qTable.update(iProblemState2, iOperator, utility2 + (alpha * ((iProblemState2.getReinforcement() + this.qTable.getBestUtility(iProblemState)) - utility2)));
        }
    }
}
