Agent Base

The base Agent class provides the core interface and functionality that all rLLM agents inherit from.

rllm.agents.agent

Trajectory `dataclass`

Source code in rllm/agents/agent.py

@dataclass
class Trajectory:
    uid: str = field(default_factory=lambda: str(uuid.uuid4()))  # unique id to deduplicate on
    name: str = "agent"
    task: Any = None
    steps: list[Step] = field(default_factory=list)
    reward: float = 0.0
    info: dict = field(default_factory=dict)

    def to_dict(self):
        return {
            "uid": self.uid,
            "name": self.name,
            "task": self.task,
            "steps": [step.to_dict() for step in self.steps],
            "reward": float(self.reward),
            "info": self.info,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "Trajectory":
        """Create Trajectory from dictionary, properly deserializing Step objects."""
        return cls(
            uid=data.get("uid", str(uuid.uuid4())),
            name=data["name"],
            task=data["task"],
            steps=[Step.from_dict(step_data) for step_data in data.get("steps", [])],
            reward=data["reward"],
            info=data.get("info", {}),
        )

    def is_cumulative(self) -> bool:
        """
        Returns True if for every step after the first, its chat_completions is an exact superset
        of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).
        """
        prev = None
        for step in self.steps:
            if prev is not None:
                prev_cc = prev.chat_completions
                curr_cc = step.chat_completions
                if not (len(curr_cc) >= len(prev_cc) and curr_cc[: len(prev_cc)] == prev_cc):
                    return False
            prev = step
        return True

from_dict `classmethod`

from_dict(data: dict) -> Trajectory

Create Trajectory from dictionary, properly deserializing Step objects.

Source code in rllm/agents/agent.py

@classmethod
def from_dict(cls, data: dict) -> "Trajectory":
    """Create Trajectory from dictionary, properly deserializing Step objects."""
    return cls(
        uid=data.get("uid", str(uuid.uuid4())),
        name=data["name"],
        task=data["task"],
        steps=[Step.from_dict(step_data) for step_data in data.get("steps", [])],
        reward=data["reward"],
        info=data.get("info", {}),
    )

is_cumulative

is_cumulative() -> bool

Returns True if for every step after the first, its chat_completions is an exact superset of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).

Source code in rllm/agents/agent.py

def is_cumulative(self) -> bool:
    """
    Returns True if for every step after the first, its chat_completions is an exact superset
    of the previous step's chat_completions (i.e., the previous chat_completions is a prefix).
    """
    prev = None
    for step in self.steps:
        if prev is not None:
            prev_cc = prev.chat_completions
            curr_cc = step.chat_completions
            if not (len(curr_cc) >= len(prev_cc) and curr_cc[: len(prev_cc)] == prev_cc):
                return False
        prev = step
    return True

Episode `dataclass`

Source code in rllm/agents/agent.py

@dataclass
class Episode:
    id: str = ""  # rollout id e.g., task_id:rollout_idx
    task: Any = None
    termination_reason: "TerminationReason" = None  # noqa: F821
    is_correct: bool = False
    trajectories: list[Trajectory] = field(default_factory=list)
    metrics: dict = field(default_factory=dict)
    info: dict = field(default_factory=dict)

    def to_dict(self):
        return {
            "id": self.id,
            "task": self.task,
            "termination_reason": self.termination_reason.value if self.termination_reason is not None else None,
            "is_correct": bool(self.is_correct),
            "trajectories": [trajectory.to_dict() for trajectory in self.trajectories],
            "metrics": self.metrics,
            "info": self.info,
        }

    @classmethod
    def from_dict(cls, data: dict) -> "Episode":
        """Create Episode from dictionary, properly deserializing Trajectory objects."""
        from rllm.engine.agent_workflow_engine import TerminationReason

        return cls(
            id=data["id"],
            task=data["task"],
            termination_reason=TerminationReason(data["termination_reason"]) if data.get("termination_reason") is not None else TerminationReason.UNKNOWN,
            is_correct=data["is_correct"],
            trajectories=[Trajectory.from_dict(trajectory_data) for trajectory_data in data["trajectories"]],
            metrics=data.get("metrics", {}),
            info=data.get("info", {}),
        )

from_dict `classmethod`

from_dict(data: dict) -> Episode

Create Episode from dictionary, properly deserializing Trajectory objects.

Source code in rllm/agents/agent.py

@classmethod
def from_dict(cls, data: dict) -> "Episode":
    """Create Episode from dictionary, properly deserializing Trajectory objects."""
    from rllm.engine.agent_workflow_engine import TerminationReason

    return cls(
        id=data["id"],
        task=data["task"],
        termination_reason=TerminationReason(data["termination_reason"]) if data.get("termination_reason") is not None else TerminationReason.UNKNOWN,
        is_correct=data["is_correct"],
        trajectories=[Trajectory.from_dict(trajectory_data) for trajectory_data in data["trajectories"]],
        metrics=data.get("metrics", {}),
        info=data.get("info", {}),
    )

BaseAgent

Bases: ABC

Source code in rllm/agents/agent.py

class BaseAgent(ABC):
    @property
    def chat_completions(self) -> list[dict[str, str]]:
        """Converts agent's internal state into a list of OAI chat completions."""
        return []

    @property
    def trajectory(self) -> Trajectory:
        """Converts agent's internal state into a Trajectory object."""
        return Trajectory()

    def update_from_env(self, observation: Any, reward: float, done: bool, info: dict, **kwargs):
        """
        Updates the agent's internal state after an environment step.

        Args:
            observation (Any): The observation after stepping through environment.
            reward (float): The reward received after taking the action.
            done (bool): Whether the episode has ended due to termination.
            info (dict): Additional metadata from the environment.
        """
        raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

    def update_from_model(self, response: str, **kwargs) -> Action:
        """
        Updates the agent's internal state after the model generates a response.

        Args:
            response (str): The response from the model.

        Returns:
            None
        """
        raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

    @abstractmethod
    def reset(self):
        """
        Resets the agent's internal state, typically called at the beginning of a new episode.

        This function should clear any stored history or state information necessary
        for a fresh interaction.

        Returns:
            None
        """
        return

    def get_current_state(self) -> Step | None:
        """
        Returns the agent's current state as a dictionary.

        This method provides access to the agent's internal state at the current step,
        which can be useful for debugging, logging, or state management.

        Returns:
            Step: The agent's current state.
        """
        if not self.trajectory.steps:
            return None
        return self.trajectory.steps[-1]

chat_completions `property`

chat_completions: list[dict[str, str]]

Converts agent's internal state into a list of OAI chat completions.

trajectory `property`

trajectory: Trajectory

Converts agent's internal state into a Trajectory object.

update_from_env

update_from_env(observation: Any, reward: float, done: bool, info: dict, **kwargs)

Updates the agent's internal state after an environment step.

Parameters:

Name	Type	Description	Default
`observation`	`Any`	The observation after stepping through environment.	required
`reward`	`float`	The reward received after taking the action.	required
`done`	`bool`	Whether the episode has ended due to termination.	required
`info`	`dict`	Additional metadata from the environment.	required

Source code in rllm/agents/agent.py

def update_from_env(self, observation: Any, reward: float, done: bool, info: dict, **kwargs):
    """
    Updates the agent's internal state after an environment step.

    Args:
        observation (Any): The observation after stepping through environment.
        reward (float): The reward received after taking the action.
        done (bool): Whether the episode has ended due to termination.
        info (dict): Additional metadata from the environment.
    """
    raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

update_from_model

update_from_model(response: str, **kwargs) -> Action

Updates the agent's internal state after the model generates a response.

Parameters:

Name	Type	Description	Default
`response`	`str`	The response from the model.	required

Returns:

Type	Description
`Action`	None

Source code in rllm/agents/agent.py

def update_from_model(self, response: str, **kwargs) -> Action:
    """
    Updates the agent's internal state after the model generates a response.

    Args:
        response (str): The response from the model.

    Returns:
        None
    """
    raise NotImplementedError("Subclasses must implement this method if using AgentExecutionEngine")

reset `abstractmethod`

reset()

Resets the agent's internal state, typically called at the beginning of a new episode.

This function should clear any stored history or state information necessary for a fresh interaction.

Returns:

Type	Description
	None

Source code in rllm/agents/agent.py

@abstractmethod
def reset(self):
    """
    Resets the agent's internal state, typically called at the beginning of a new episode.

    This function should clear any stored history or state information necessary
    for a fresh interaction.

    Returns:
        None
    """
    return

get_current_state

get_current_state() -> Step | None

Returns the agent's current state as a dictionary.

This method provides access to the agent's internal state at the current step, which can be useful for debugging, logging, or state management.

Returns:

Name	Type	Description
`Step`	`Step \| None`	The agent's current state.

Source code in rllm/agents/agent.py

def get_current_state(self) -> Step | None:
    """
    Returns the agent's current state as a dictionary.

    This method provides access to the agent's internal state at the current step,
    which can be useful for debugging, logging, or state management.

    Returns:
        Step: The agent's current state.
    """
    if not self.trajectory.steps:
        return None
    return self.trajectory.steps[-1]

Agent Base

rllm.agents.agent

Trajectory dataclass

from_dict classmethod

is_cumulative

Episode dataclass

from_dict classmethod

BaseAgent

chat_completions property

trajectory property

update_from_env

update_from_model

reset abstractmethod

get_current_state

Trajectory `dataclass`

from_dict `classmethod`

Episode `dataclass`

from_dict `classmethod`

chat_completions `property`

trajectory `property`

reset `abstractmethod`