Skip to content

Agent Base

The base Agent class provides the core interface and functionality that all rLLM agents inherit from.

rllm.agents.agent

Trajectory dataclass

Source code in rllm/agents/agent.py
@dataclass
class Trajectory:
    uid: str = field(default_factory=lambda: str(uuid.uuid4()))  # unique id to deduplicate on
    name: str = "agent"
    task: Any = None
    steps: list[Step] = field(default_factory=list)
    reward: float = 0.0
    info: dict = field(default_factory=dict)

    def to_dict(self):
        return {
            "uid": self.uid,
            "name": self.name,
            "task": self.task,
            "steps": [step.to_dict() for step in self.steps],
            "reward": float(self.reward),
            "info": self.info,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "Trajectory":
        """Create Trajectory from dictionary, properly deserializing Step objects."""
        return cls(
            uid=data.get("uid", str(uuid.uuid4())),
            name=data["name"],
            task=data["task"],
            steps=[Step.from_dict(step_data) for step_data in data.get("steps", [])],
            reward=data["reward"],
            info=data.get("info", {}),
        )

    def is_cumulative(self) -> bool:
        """
        Returns True if for every step after the first, its chat_completions is an exact superset
        of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).
        """
        prev = None
        for step in self.steps:
            if prev is not None:
                prev_cc = prev.chat_completions
                curr_cc = step.chat_completions
                if not (len(curr_cc) >= len(prev_cc) and curr_cc[: len(prev_cc)] == prev_cc):
                    return False
            prev = step
        return True

from_dict classmethod

from_dict(data: dict) -> Trajectory

Create Trajectory from dictionary, properly deserializing Step objects.

Source code in rllm/agents/agent.py
@classmethod
def from_dict(cls, data: dict) -> "Trajectory":
    """Create Trajectory from dictionary, properly deserializing Step objects."""
    return cls(
        uid=data.get("uid", str(uuid.uuid4())),
        name=data["name"],
        task=data["task"],
        steps=[Step.from_dict(step_data) for step_data in data.get("steps", [])],
        reward=data["reward"],
        info=data.get("info", {}),
    )

is_cumulative

is_cumulative() -> bool

Returns True if for every step after the first, its chat_completions is an exact superset of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).

Source code in rllm/agents/agent.py
def is_cumulative(self) -> bool:
    """
    Returns True if for every step after the first, its chat_completions is an exact superset
    of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).
    """
    prev = None
    for step in self.steps:
        if prev is not None:
            prev_cc = prev.chat_completions
            curr_cc = step.chat_completions
            if not (len(curr_cc) >= len(prev_cc) and curr_cc[: len(prev_cc)] == prev_cc):
                return False
        prev = step
    return True

Episode dataclass

Source code in rllm/agents/agent.py
@dataclass
class Episode:
    id: str = ""  # rollout id e.g., task_id:rollout_idx
    task: Any = None
    termination_reason: "TerminationReason" = None  # noqa: F821
    is_correct: bool = False
    trajectories: list[Trajectory] = field(default_factory=list)
    metrics: dict = field(default_factory=dict)
    info: dict = field(default_factory=dict)

    def to_dict(self):
        return {
            "id": self.id,
            "task": self.task,
            "termination_reason": self.termination_reason.value if self.termination_reason is not None else None,
            "is_correct": bool(self.is_correct),
            "trajectories": [trajectory.to_dict() for trajectory in self.trajectories],
            "metrics": self.metrics,
            "info": self.info,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "Episode":
        """Create Episode from dictionary, properly deserializing Trajectory objects."""
        from rllm.engine.agent_workflow_engine import TerminationReason

        return cls(
            id=data["id"],
            task=data["task"],
            termination_reason=TerminationReason(data["termination_reason"]) if data.get("termination_reason") is not None else TerminationReason.UNKNOWN,
            is_correct=data["is_correct"],
            trajectories=[Trajectory.from_dict(trajectory_data) for trajectory_data in data["trajectories"]],
            metrics=data.get("metrics", {}),
            info=data.get("info", {}),
        )

from_dict classmethod

from_dict(data: dict) -> Episode

Create Episode from dictionary, properly deserializing Trajectory objects.

Source code in rllm/agents/agent.py
@classmethod
def from_dict(cls, data: dict) -> "Episode":
    """Create Episode from dictionary, properly deserializing Trajectory objects."""
    from rllm.engine.agent_workflow_engine import TerminationReason

    return cls(
        id=data["id"],
        task=data["task"],
        termination_reason=TerminationReason(data["termination_reason"]) if data.get("termination_reason") is not None else TerminationReason.UNKNOWN,
        is_correct=data["is_correct"],
        trajectories=[Trajectory.from_dict(trajectory_data) for trajectory_data in data["trajectories"]],
        metrics=data.get("metrics", {}),
        info=data.get("info", {}),
    )

BaseAgent

Bases: ABC

Source code in rllm/agents/agent.py
class BaseAgent(ABC):
    @property
    def chat_completions(self) -> list[dict[str, str]]:
        """Converts agent's internal state into a list of OAI chat completions."""
        return []

    @property
    def trajectory(self) -> Trajectory:
        """Converts agent's internal state into a Trajectory object."""
        return Trajectory()

    def update_from_env(self, observation: Any, reward: float, done: bool, info: dict, **kwargs):
        """
        Updates the agent's internal state after an environment step.

        Args:
            observation (Any): The observation after stepping through environment.
            reward (float): The reward received after taking the action.
            done (bool): Whether the episode has ended due to termination.
            info (dict): Additional metadata from the environment.
        """
        raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

    def update_from_model(self, response: str, **kwargs) -> Action:
        """
        Updates the agent's internal state after the model generates a response.

        Args:
            response (str): The response from the model.

        Returns:
            None
        """
        raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

    @abstractmethod
    def reset(self):
        """
        Resets the agent's internal state, typically called at the beginning of a new episode.

        This function should clear any stored history or state information necessary
        for a fresh interaction.

        Returns:
            None
        """
        return

    def get_current_state(self) -> Step | None:
        """
        Returns the agent's current state as a dictionary.

        This method provides access to the agent's internal state at the current step,
        which can be useful for debugging, logging, or state management.

        Returns:
            Step: The agent's current state.
        """
        if not self.trajectory.steps:
            return None
        return self.trajectory.steps[-1]

chat_completions property

chat_completions: list[dict[str, str]]

Converts agent's internal state into a list of OAI chat completions.

trajectory property

trajectory: Trajectory

Converts agent's internal state into a Trajectory object.

update_from_env

update_from_env(observation: Any, reward: float, done: bool, info: dict, **kwargs)

Updates the agent's internal state after an environment step.

Parameters:

Name Type Description Default
observation Any

The observation after stepping through environment.

required
reward float

The reward received after taking the action.

required
done bool

Whether the episode has ended due to termination.

required
info dict

Additional metadata from the environment.

required
Source code in rllm/agents/agent.py
def update_from_env(self, observation: Any, reward: float, done: bool, info: dict, **kwargs):
    """
    Updates the agent's internal state after an environment step.

    Args:
        observation (Any): The observation after stepping through environment.
        reward (float): The reward received after taking the action.
        done (bool): Whether the episode has ended due to termination.
        info (dict): Additional metadata from the environment.
    """
    raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

update_from_model

update_from_model(response: str, **kwargs) -> Action

Updates the agent's internal state after the model generates a response.

Parameters:

Name Type Description Default
response str

The response from the model.

required

Returns:

Type Description
Action

None

Source code in rllm/agents/agent.py
def update_from_model(self, response: str, **kwargs) -> Action:
    """
    Updates the agent's internal state after the model generates a response.

    Args:
        response (str): The response from the model.

    Returns:
        None
    """
    raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

reset abstractmethod

reset()

Resets the agent's internal state, typically called at the beginning of a new episode.

This function should clear any stored history or state information necessary for a fresh interaction.

Returns:

Type Description

None

Source code in rllm/agents/agent.py
@abstractmethod
def reset(self):
    """
    Resets the agent's internal state, typically called at the beginning of a new episode.

    This function should clear any stored history or state information necessary
    for a fresh interaction.

    Returns:
        None
    """
    return

get_current_state

get_current_state() -> Step | None

Returns the agent's current state as a dictionary.

This method provides access to the agent's internal state at the current step, which can be useful for debugging, logging, or state management.

Returns:

Name Type Description
Step Step | None

The agent's current state.

Source code in rllm/agents/agent.py
def get_current_state(self) -> Step | None:
    """
    Returns the agent's current state as a dictionary.

    This method provides access to the agent's internal state at the current step,
    which can be useful for debugging, logging, or state management.

    Returns:
        Step: The agent's current state.
    """
    if not self.trajectory.steps:
        return None
    return self.trajectory.steps[-1]