/*
 * Decompiled with CFR 0.152.
 */
package aima.test.learningtest;

import aima.learning.reinforcement.PassiveADPAgent;
import aima.learning.reinforcement.PassiveTDAgent;
import aima.learning.reinforcement.QLearningAgent;
import aima.learning.reinforcement.QTable;
import aima.probability.decision.MDP;
import aima.probability.decision.MDPFactory;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPUtilityFunction;
import aima.probability.decision.cellworld.CellWorldPosition;
import aima.test.probabilitytest.MockRandomizer;
import aima.util.Pair;
import java.util.Hashtable;
import junit.framework.TestCase;

public class ReinforcementLearningTest
extends TestCase {
    MDP<CellWorldPosition, String> fourByThree;
    MDPPolicy<CellWorldPosition, String> policy;

    public void setUp() {
        this.fourByThree = MDPFactory.createFourByThreeMDP();
        this.policy = new MDPPolicy();
        this.policy.setAction(new CellWorldPosition(1, 1), "up");
        this.policy.setAction(new CellWorldPosition(1, 2), "left");
        this.policy.setAction(new CellWorldPosition(1, 3), "left");
        this.policy.setAction(new CellWorldPosition(1, 4), "left");
        this.policy.setAction(new CellWorldPosition(2, 1), "up");
        this.policy.setAction(new CellWorldPosition(2, 3), "up");
        this.policy.setAction(new CellWorldPosition(3, 1), "right");
        this.policy.setAction(new CellWorldPosition(3, 2), "right");
        this.policy.setAction(new CellWorldPosition(3, 3), "right");
    }

    public void testPassiveADPAgent() {
        PassiveADPAgent<CellWorldPosition, String> passiveADPAgent = new PassiveADPAgent<CellWorldPosition, String>(this.fourByThree, this.policy);
        MockRandomizer mockRandomizer = new MockRandomizer(new double[]{0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5});
        MDPUtilityFunction<CellWorldPosition> mDPUtilityFunction = null;
        for (int i = 0; i < 100; ++i) {
            passiveADPAgent.executeTrial(mockRandomizer);
            mDPUtilityFunction = passiveADPAgent.getUtilityFunction();
        }
        ReinforcementLearningTest.assertEquals((double)0.676, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.626, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 2)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.573, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.519, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 4)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.746, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(2, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.865, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(2, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.796, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.906, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)1.0, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 4)), (double)0.001);
    }

    public void testPassiveTDAgent() {
        PassiveTDAgent<CellWorldPosition, String> passiveTDAgent = new PassiveTDAgent<CellWorldPosition, String>(this.fourByThree, this.policy);
        MockRandomizer mockRandomizer = new MockRandomizer(new double[]{0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5});
        MDPUtilityFunction<CellWorldPosition> mDPUtilityFunction = null;
        for (int i = 0; i < 200; ++i) {
            passiveTDAgent.executeTrial(mockRandomizer);
            mDPUtilityFunction = passiveTDAgent.getUtilityFunction();
        }
        ReinforcementLearningTest.assertEquals((double)0.662, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.61, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 2)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.553, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.496, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(1, 4)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.735, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(2, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.835, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(2, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.789, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 1)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)0.889, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 3)), (double)0.001);
        ReinforcementLearningTest.assertEquals((double)1.0, (double)mDPUtilityFunction.getUtility(new CellWorldPosition(3, 4)), (double)0.001);
    }

    public void xtestQLearningAgent() {
        QLearningAgent<CellWorldPosition, String> qLearningAgent = new QLearningAgent<CellWorldPosition, String>(this.fourByThree);
        MockRandomizer mockRandomizer = new MockRandomizer(new double[]{0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5});
        Hashtable<Pair<CellWorldPosition, String>, Double> hashtable = null;
        QTable<CellWorldPosition, String> qTable = null;
        for (int i = 0; i < 100; ++i) {
            qLearningAgent.executeTrial(mockRandomizer);
            hashtable = qLearningAgent.getQ();
            qTable = qLearningAgent.getQTable();
        }
        System.out.println(qTable);
        System.out.println(qTable.getPolicy());
    }

    public void testFirstStepsOfQLAAgentUnderNormalProbability() {
        QLearningAgent<CellWorldPosition, String> qLearningAgent = new QLearningAgent<CellWorldPosition, String>(this.fourByThree);
        MockRandomizer mockRandomizer = new MockRandomizer(new double[]{0.7});
        CellWorldPosition cellWorldPosition = new CellWorldPosition(1, 4);
        String string = qLearningAgent.decideAction(new MDPPerception<CellWorldPosition>(cellWorldPosition, -0.04));
        ReinforcementLearningTest.assertEquals((String)"left", (String)string);
        ReinforcementLearningTest.assertEquals((Object)0.0, (Object)qLearningAgent.getQTable().getQValue(cellWorldPosition, string));
        qLearningAgent.execute(string, mockRandomizer);
        ReinforcementLearningTest.assertEquals((Object)new CellWorldPosition(1, 3), qLearningAgent.getCurrentState());
        ReinforcementLearningTest.assertEquals((Object)-0.04, (Object)qLearningAgent.getCurrentReward());
        ReinforcementLearningTest.assertEquals((Object)0.0, (Object)qLearningAgent.getQTable().getQValue(cellWorldPosition, string));
        String string2 = qLearningAgent.decideAction(new MDPPerception<CellWorldPosition>(new CellWorldPosition(1, 3), -0.04));
        ReinforcementLearningTest.assertEquals((Object)-0.04, (Object)qLearningAgent.getQTable().getQValue(cellWorldPosition, string));
    }

    public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() {
        QLearningAgent<CellWorldPosition, String> qLearningAgent = new QLearningAgent<CellWorldPosition, String>(this.fourByThree);
        CellWorldPosition cellWorldPosition = new CellWorldPosition(1, 4);
        String string = qLearningAgent.decideAction(new MDPPerception<CellWorldPosition>(cellWorldPosition, -0.04));
        ReinforcementLearningTest.assertEquals((String)"left", (String)string);
        MockRandomizer mockRandomizer = new MockRandomizer(new double[]{0.85});
        qLearningAgent.execute(string, mockRandomizer);
        ReinforcementLearningTest.assertEquals((Object)new CellWorldPosition(2, 4), qLearningAgent.getCurrentState());
        ReinforcementLearningTest.assertEquals((Object)-1.0, (Object)qLearningAgent.getCurrentReward());
        ReinforcementLearningTest.assertEquals((Object)0.0, (Object)qLearningAgent.getQTable().getQValue(cellWorldPosition, string));
        String string2 = qLearningAgent.decideAction(new MDPPerception<CellWorldPosition>(new CellWorldPosition(2, 4), -1.0));
        ReinforcementLearningTest.assertNull((Object)string2);
        ReinforcementLearningTest.assertEquals((Object)-1.0, (Object)qLearningAgent.getQTable().getQValue(cellWorldPosition, string));
    }
}

