/*
 * Decompiled with CFR 0.152.
 */
package aima.learning.reinforcement;

import aima.learning.reinforcement.MDPAgent;
import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.FrequencyCounter;

public class PassiveTDAgent<STATE_TYPE, ACTION_TYPE>
extends MDPAgent<STATE_TYPE, ACTION_TYPE> {
    private MDPPolicy<STATE_TYPE, ACTION_TYPE> policy;
    private MDPUtilityFunction<STATE_TYPE> utilityFunction;
    private FrequencyCounter<STATE_TYPE> stateCount;
    private Double previousReward;

    public PassiveTDAgent(MDP<STATE_TYPE, ACTION_TYPE> mDP, MDPPolicy<STATE_TYPE, ACTION_TYPE> mDPPolicy) {
        super(mDP.emptyMdp());
        this.policy = mDPPolicy;
        this.utilityFunction = new MDPUtilityFunction();
        this.stateCount = new FrequencyCounter();
    }

    @Override
    public ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> mDPPerception) {
        if (!this.utilityFunction.hasUtilityFor(mDPPerception.getState())) {
            this.utilityFunction.setUtility(mDPPerception.getState(), mDPPerception.getReward());
            this.mdp.setReward(mDPPerception.getState(), mDPPerception.getReward());
        }
        if (this.previousState != null) {
            this.stateCount.incrementFor(this.previousState);
            this.utilityFunction = this.updateUtilityFunction(1.0);
        }
        if (this.mdp.isTerminalState(this.currentState)) {
            this.previousState = null;
            this.previousAction = null;
            this.previousReward = null;
        } else {
            this.previousState = this.currentState;
            this.previousAction = this.policy.getAction(this.currentState);
            this.previousReward = this.currentReward;
        }
        return (ACTION_TYPE)this.previousAction;
    }

    private MDPUtilityFunction<STATE_TYPE> updateUtilityFunction(double d) {
        MDPUtilityFunction<Object> mDPUtilityFunction = this.utilityFunction.copy();
        double d2 = this.utilityFunction.getUtility(this.previousState);
        double d3 = d * this.utilityFunction.getUtility(this.currentState) - this.utilityFunction.getUtility(this.previousState);
        double d4 = this.stateCount.probabilityOf(this.previousState) * (this.previousReward + d3);
        mDPUtilityFunction.setUtility(this.previousState, d2 + d4);
        return mDPUtilityFunction;
    }

    public MDPUtilityFunction<STATE_TYPE> getUtilityFunction() {
        return this.utilityFunction;
    }
}

