/*
 * Decompiled with CFR 0.152.
 */
package aima.core.learning.reinforcement.agent;

import aima.core.agent.Action;
import aima.core.learning.reinforcement.PerceptStateReward;
import aima.core.learning.reinforcement.agent.ReinforcementAgent;
import aima.core.probability.mdp.ActionsFunction;
import aima.core.probability.mdp.PolicyEvaluation;
import aima.core.probability.mdp.RewardFunction;
import aima.core.probability.mdp.TransitionProbabilityFunction;
import aima.core.probability.mdp.impl.MDP;
import aima.core.util.FrequencyCounter;
import aima.core.util.datastructure.Pair;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

public class PassiveADPAgent<S, A extends Action>
extends ReinforcementAgent<S, A> {
    private Map<S, A> pi = new HashMap<S, A>();
    private MDP<S, A> mdp = null;
    private Map<Pair<S, Pair<S, A>>, Double> P = new HashMap<Pair<S, Pair<S, A>>, Double>();
    private Map<S, Double> R = new HashMap<S, Double>();
    private PolicyEvaluation<S, A> policyEvaluation = null;
    private Map<S, Double> U = new HashMap<S, Double>();
    private FrequencyCounter<Pair<S, A>> Nsa = new FrequencyCounter();
    private FrequencyCounter<Pair<S, Pair<S, A>>> NsDelta_sa = new FrequencyCounter();
    private S s = null;
    private A a = null;

    public PassiveADPAgent(Map<S, A> fixedPolicy, Set<S> states, S initialState, ActionsFunction<S, A> actionsFunction, PolicyEvaluation<S, A> policyEvaluation) {
        this.pi.putAll(fixedPolicy);
        this.mdp = new MDP<S, A>(states, initialState, actionsFunction, new TransitionProbabilityFunction<S, A>(){

            @Override
            public double probability(S sDelta, S s, A a) {
                Double p = (Double)PassiveADPAgent.this.P.get(new Pair(sDelta, new Pair(s, a)));
                return null == p ? 0.0 : p;
            }
        }, new RewardFunction<S>(){

            @Override
            public double reward(S s) {
                return (Double)PassiveADPAgent.this.R.get(s);
            }
        });
        this.policyEvaluation = policyEvaluation;
    }

    @Override
    public A execute(PerceptStateReward<S> percept) {
        S sDelta = percept.state();
        double rDelta = percept.reward();
        if (!this.U.containsKey(sDelta)) {
            this.U.put(sDelta, rDelta);
            this.R.put(sDelta, rDelta);
        }
        if (null != this.s) {
            Pair<S, A> sa = new Pair<S, A>(this.s, this.a);
            this.Nsa.incrementFor(sa);
            this.NsDelta_sa.incrementFor(new Pair<S, Pair<S, A>>(sDelta, sa));
            for (S t : this.mdp.states()) {
                Pair<S, Pair<S, A>> t_sa = new Pair<S, Pair<S, A>>(t, sa);
                if (0 == this.NsDelta_sa.getCount(t_sa)) continue;
                this.P.put(t_sa, this.NsDelta_sa.getCount(t_sa).doubleValue() / this.Nsa.getCount(sa).doubleValue());
            }
        }
        this.U = this.policyEvaluation.evaluate(this.pi, this.U, this.mdp);
        if (this.isTerminal(sDelta)) {
            this.s = null;
            this.a = null;
        } else {
            this.s = sDelta;
            this.a = (Action)this.pi.get(sDelta);
        }
        return this.a;
    }

    @Override
    public Map<S, Double> getUtility() {
        return Collections.unmodifiableMap(this.U);
    }

    @Override
    public void reset() {
        this.P.clear();
        this.R.clear();
        this.U = new HashMap<S, Double>();
        this.Nsa.clear();
        this.NsDelta_sa.clear();
        this.s = null;
        this.a = null;
    }

    private boolean isTerminal(S s) {
        boolean terminal = false;
        if (0 == this.mdp.actions(s).size()) {
            terminal = true;
        }
        return terminal;
    }
}

