package kdo.reinforcement;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import kdo.domain.IOperator;
import kdo.domain.IProblemState;

/* loaded from: input_file:kdo/reinforcement/ActiveRLAgent.class */
public class ActiveRLAgent extends RLAgent {
    private Map<IProblemState, TransitionModel> model;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:kdo/reinforcement/ActiveRLAgent$TransitionModel.class */
    public class TransitionModel {
        private Map<IOperator, TransitionModelAction> actions = new HashMap();

        public TransitionModel() {
        }

        public float getProbability(IOperator iOperator, IProblemState iProblemState) {
            if (this.actions.containsKey(iOperator)) {
                return this.actions.get(iOperator).getProbability(iProblemState);
            }
            return 0.0f;
        }

        public void update(IOperator iOperator, IProblemState iProblemState) {
            if (!this.actions.containsKey(iOperator)) {
                this.actions.put(iOperator, new TransitionModelAction());
            }
            this.actions.get(iOperator).update(iProblemState);
        }

        public IOperator getBestAction() {
            float f = Float.NEGATIVE_INFINITY;
            IOperator iOperator = null;
            for (IOperator iOperator2 : this.actions.keySet()) {
                float expectedUtility = getExpectedUtility(iOperator2);
                if (expectedUtility > f) {
                    f = expectedUtility;
                    iOperator = iOperator2;
                }
            }
            return iOperator;
        }

        public float getExpectedUtility(IOperator iOperator) {
            Collection<IProblemState> successorStates = getSuccessorStates(iOperator);
            if (successorStates.isEmpty()) {
                return 3.0f + ((float) Math.random());
            }
            float f = 0.0f;
            for (IProblemState iProblemState : successorStates) {
                f += getProbability(iOperator, iProblemState) * ActiveRLAgent.this.utilityFunction(iProblemState.getUtility(), iProblemState.getExplorationCount());
            }
            return f;
        }

        public Collection<IProblemState> getSuccessorStates(IOperator iOperator) {
            return !this.actions.containsKey(iOperator) ? Collections.emptyList() : this.actions.get(iOperator).getSuccessorStates();
        }

        public String bestActionString() {
            StringBuilder sb = new StringBuilder();
            IOperator bestAction = getBestAction();
            sb.append(bestAction + ": ");
            sb.append(this.actions.get(bestAction));
            return sb.toString();
        }

        public String allActionsString() {
            StringBuilder sb = new StringBuilder();
            for (Map.Entry<IOperator, TransitionModelAction> entry : this.actions.entrySet()) {
                sb.append(entry.getKey());
                sb.append(entry.getValue() + "\n");
            }
            return sb.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:kdo/reinforcement/ActiveRLAgent$TransitionModelAction.class */
    public class TransitionModelAction {
        int count = 0;
        Map<IProblemState, Integer> visitCounts = new HashMap();

        public TransitionModelAction() {
        }

        public int getCount() {
            return this.count;
        }

        public Collection<IProblemState> getSuccessorStates() {
            return this.visitCounts.keySet();
        }

        public void update(IProblemState iProblemState) {
            this.count++;
            int i = 1;
            if (this.visitCounts.containsKey(iProblemState)) {
                i = this.visitCounts.get(iProblemState).intValue() + 1;
            }
            this.visitCounts.put(iProblemState, Integer.valueOf(i));
        }

        public float getProbability(IProblemState iProblemState) {
            if (this.visitCounts.containsKey(iProblemState)) {
                return this.visitCounts.get(iProblemState).intValue() / this.count;
            }
            return 0.0f;
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("(%d): ", Integer.valueOf(getCount())));
            for (IProblemState iProblemState : this.visitCounts.keySet()) {
                sb.append(iProblemState.getName());
                sb.append(String.format(" %5.2f ", Float.valueOf(getProbability(iProblemState))));
            }
            return sb.toString();
        }
    }

    public ActiveRLAgent(IValueFunctionUpdate iValueFunctionUpdate, IStateMapper iStateMapper, long j) {
        super(iValueFunctionUpdate, iStateMapper, j);
        this.model = new HashMap();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // kdo.reinforcement.RLAgent
    public void onNewState(IProblemState iProblemState, IOperator iOperator) {
        super.onNewState(iProblemState, iOperator);
        if (this.percepts.size() > 1) {
            IProblemState iProblemState2 = this.percepts.get(this.percepts.size() - 2);
            TransitionModel transitionModel = this.model.get(iProblemState2);
            if (transitionModel == null) {
                transitionModel = new TransitionModel();
                this.model.put(iProblemState2, transitionModel);
            }
            transitionModel.update(iOperator, iProblemState);
        }
    }

    @Override // kdo.reinforcement.RLAgent
    public IOperator decideOnAction(IProblemState iProblemState) {
        TransitionModel transitionModel = this.model.get(iProblemState);
        if (transitionModel == null) {
            transitionModel = new TransitionModel();
            this.model.put(iProblemState, transitionModel);
        }
        List<IOperator> operators = iProblemState.getOperators();
        float f = Float.NEGATIVE_INFINITY;
        IOperator iOperator = operators.get(0);
        for (IOperator iOperator2 : operators) {
            float expectedUtility = transitionModel.getExpectedUtility(iOperator2);
            if (expectedUtility > f) {
                iOperator = iOperator2;
                f = expectedUtility;
            }
        }
        return iOperator;
    }

    public String toString() {
        return "Strategy\n" + toStrategyString() + "\nModel\n" + toModelString();
    }

    public String toStrategyString() {
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<IProblemState, TransitionModel> entry : this.model.entrySet()) {
            sb.append(entry.getKey());
            sb.append(" do: ");
            sb.append(entry.getValue().bestActionString());
            sb.append("\n");
        }
        return sb.toString();
    }

    public String toModelString() {
        StringBuilder sb = new StringBuilder();
        for (IProblemState iProblemState : this.model.keySet()) {
            sb.append(iProblemState);
            sb.append(" do: ");
            sb.append(getAllActionsString(iProblemState));
            sb.append("\n");
        }
        return sb.toString();
    }

    public String getAllActionsString(IProblemState iProblemState) {
        return this.model.get(iProblemState).allActionsString();
    }
}
