Code Tools

rllm.tools.code_tools

E2BPythonInterpreter

Bases: CodeTool

A tool for executing Python code in a sandboxed environment.

Source code in rllm/tools/code_tools/e2b_tool.py

class E2BPythonInterpreter(CodeTool):
    """A tool for executing Python code in a sandboxed environment."""

    def __init__(self, n_sandboxes=1, api_key=E2B_API_KEY):
        if Sandbox is None:
            raise ImportError("e2b_code_interpreter is not installed. Please install it with `pip install e2b-code-interpreter`.")
        assert n_sandboxes > 0, "Number of sandboxes must be greater than 0"
        self.n_sandboxes = n_sandboxes
        self.api_key = api_key
        self._init_sandbox()
        super().__init__(name="e2b_python", description="A tool that executes python code in a sandbox and returns standard output/error.")

    def _init_sandbox(self):
        """Initialize multiple sandbox environments."""
        self.sandboxes = []
        self.cur_sandbox_idx = 0
        for _ in range(self.n_sandboxes):
            sandbox = Sandbox(api_key=self.api_key, timeout=3600)
            self.sandboxes.append(sandbox)

    def _kill_sandbox(self):
        """Clean up all sandbox resources."""
        for sandbox in self.sandboxes:
            try:
                sandbox.kill()
            except Exception as e:
                print(f"Error killing sandbox: {e}")
        self.sandboxes = []

    def _restart_sandbox(self, id: int = 0) -> Any:
        """Restart a sandbox and return a new one."""
        previous_sandbox = self.sandboxes[id]
        previous_sandbox.kill()
        sandbox = Sandbox(api_key=self.api_key, timeout=3600)
        self.sandboxes[id] = sandbox
        return sandbox

    def forward(self, code: str, timeout: int = 20, **kwargs) -> CodeToolOutput:
        """
        Execute Python code in one of the sandboxes using round-robin distribution.

        Args:
            code: Python code to execute
            timeout: Maximum execution time in seconds
            **kwargs: Additional parameters including id, max_retries

        Returns:
            CodeToolOutput containing execution results, stdout, and stderr
        """
        id = kwargs.get("id", None)
        max_retries = kwargs.get("max_retries", 3)

        if id:
            self.cur_sandbox_idx = id % self.n_sandboxes
        else:
            # Round-robin distribution
            self.cur_sandbox_idx = (self.cur_sandbox_idx + 1) % self.n_sandboxes
        sandbox = self.sandboxes[self.cur_sandbox_idx]

        while max_retries > 0:
            try:
                execution = sandbox.run_code(code, timeout=timeout)
                break
            except Exception:
                max_retries -= 1
                if max_retries == 0:
                    self._restart_sandbox(self.cur_sandbox_idx)
                    return CodeToolOutput(name=self.name or "e2b_python", error="Sandbox error, please try again.")

        # Create a CodeToolOutput object instead of a dictionary
        result = None
        stdout = None
        stderr = None

        if execution.results:
            assert len(execution.results) == 1, "Only one result is supported"
            result = execution.results[0].text

        if execution.logs:
            assert len(execution.logs.stdout) == 1, "Only one stdout is supported"
            stdout = execution.logs.stdout[0]

        if execution.error:
            stderr = f"{execution.error.traceback}"

        return CodeToolOutput(name=self.name or "e2b_python", stdout=stdout or None, stderr=stderr or None, output=result or None)

    @property
    def json(self) -> dict[str, Any]:
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        "code": {
                            "type": "string",
                            "description": "Python code to execute in a E2B sandbox environment.",
                        }
                    },
                    "required": ["code"],
                },
            },
        }

forward

forward(code: str, timeout: int = 20, **kwargs) -> CodeToolOutput

Execute Python code in one of the sandboxes using round-robin distribution.

Parameters:

Name	Type	Description	Default
`code`	`str`	Python code to execute	required
`timeout`	`int`	Maximum execution time in seconds	`20`
`**kwargs`		Additional parameters including id, max_retries	`{}`

Returns:

Type	Description
`CodeToolOutput`	CodeToolOutput containing execution results, stdout, and stderr

Source code in rllm/tools/code_tools/e2b_tool.py

def forward(self, code: str, timeout: int = 20, **kwargs) -> CodeToolOutput:
    """
    Execute Python code in one of the sandboxes using round-robin distribution.

    Args:
        code: Python code to execute
        timeout: Maximum execution time in seconds
        **kwargs: Additional parameters including id, max_retries

    Returns:
        CodeToolOutput containing execution results, stdout, and stderr
    """
    id = kwargs.get("id", None)
    max_retries = kwargs.get("max_retries", 3)

    if id:
        self.cur_sandbox_idx = id % self.n_sandboxes
    else:
        # Round-robin distribution
        self.cur_sandbox_idx = (self.cur_sandbox_idx + 1) % self.n_sandboxes
    sandbox = self.sandboxes[self.cur_sandbox_idx]

    while max_retries > 0:
        try:
            execution = sandbox.run_code(code, timeout=timeout)
            break
        except Exception:
            max_retries -= 1
            if max_retries == 0:
                self._restart_sandbox(self.cur_sandbox_idx)
                return CodeToolOutput(name=self.name or "e2b_python", error="Sandbox error, please try again.")

    # Create a CodeToolOutput object instead of a dictionary
    result = None
    stdout = None
    stderr = None

    if execution.results:
        assert len(execution.results) == 1, "Only one result is supported"
        result = execution.results[0].text

    if execution.logs:
        assert len(execution.logs.stdout) == 1, "Only one stdout is supported"
        stdout = execution.logs.stdout[0]

    if execution.error:
        stderr = f"{execution.error.traceback}"

    return CodeToolOutput(name=self.name or "e2b_python", stdout=stdout or None, stderr=stderr or None, output=result or None)

LCBPythonInterpreter

Bases: CodeTool

A tool for executing Python code in a sandboxed environment.

This tool provides a safe way to execute Python code with timeout protection and isolation from the main process, using the LiveCodeBench execution environment.

Source code in rllm/tools/code_tools/lcb_tool.py

class LCBPythonInterpreter(CodeTool):
    """
    A tool for executing Python code in a sandboxed environment.

    This tool provides a safe way to execute Python code with timeout protection
    and isolation from the main process, using the LiveCodeBench execution environment.
    """

    def __init__(self):
        """Initialize the Python interpreter tool with appropriate settings."""
        super().__init__(
            name="python",
            description="Execute python code in the same environment as the LiveCodeBench benchmark.",
            n_sandboxes=-1,
        )

    def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
        """
        Execute Python code using the LiveCodeBench sandbox environment.

        Args:
            code (str): Python code to execute
            timeout (int): Maximum execution time in seconds, defaults to 12
            **kwargs: Additional parameters (unused but kept for compatibility)

        Returns:
            CodeToolOutput: Contains execution results with stdout, stderr, and result fields
        """
        try:
            stdout, stderr, result = lcb_sandbox(code, timeout=timeout)
            return CodeToolOutput(name=self.name or "python", stdout=stdout, stderr=stderr, output=result)
        except Exception as e:
            return CodeToolOutput(
                name=self.name or "python",
                error=f"Sandbox Error: {type(e).__name__} - {str(e)}",
            )

init

__init__()

Initialize the Python interpreter tool with appropriate settings.

Source code in rllm/tools/code_tools/lcb_tool.py

def __init__(self):
    """Initialize the Python interpreter tool with appropriate settings."""
    super().__init__(
        name="python",
        description="Execute python code in the same environment as the LiveCodeBench benchmark.",
        n_sandboxes=-1,
    )

forward

forward(code: str, timeout: int = 12, **kwargs) -> CodeToolOutput

Execute Python code using the LiveCodeBench sandbox environment.

Parameters:

Name	Type	Description	Default
`code`	`str`	Python code to execute	required
`timeout`	`int`	Maximum execution time in seconds, defaults to 12	`12`
`**kwargs`		Additional parameters (unused but kept for compatibility)	`{}`

Returns:

Name	Type	Description
`CodeToolOutput`	`CodeToolOutput`	Contains execution results with stdout, stderr, and result fields

Source code in rllm/tools/code_tools/lcb_tool.py

def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
    """
    Execute Python code using the LiveCodeBench sandbox environment.

    Args:
        code (str): Python code to execute
        timeout (int): Maximum execution time in seconds, defaults to 12
        **kwargs: Additional parameters (unused but kept for compatibility)

    Returns:
        CodeToolOutput: Contains execution results with stdout, stderr, and result fields
    """
    try:
        stdout, stderr, result = lcb_sandbox(code, timeout=timeout)
        return CodeToolOutput(name=self.name or "python", stdout=stdout, stderr=stderr, output=result)
    except Exception as e:
        return CodeToolOutput(
            name=self.name or "python",
            error=f"Sandbox Error: {type(e).__name__} - {str(e)}",
        )

PythonInterpreter

Bases: CodeTool

A unified Python interpreter tool that supports multiple backends.

This class provides a common interface for executing Python code using different backend implementations, including local execution, E2B sandbox, Together API, and LiveCodeBench environment.

Source code in rllm/tools/code_tools/python_interpreter.py

class PythonInterpreter(CodeTool):
    """
    A unified Python interpreter tool that supports multiple backends.

    This class provides a common interface for executing Python code using different
    backend implementations, including local execution, E2B sandbox, Together API,
    and LiveCodeBench environment.
    """

    def __init__(self, backend: BackendType = "local", n_sandboxes: int = 1, api_key: str | None = None, name: str = "python", description: str = "Execute Python code in a sandboxed environment. Returns results and standard output/error."):
        """
        Initialize the unified Python interpreter with the specified backend.

        Args:
            backend: The backend to use ("local", "e2b", "together", or "lcb")
            n_sandboxes: Number of concurrent sandboxes/workers to use (for applicable backends)
            api_key: API key for cloud-based backends (e2b, together)
            name: The name of the tool
            description: Description of what the tool does
        """
        self.backend_type = backend
        self.n_sandboxes = n_sandboxes
        self.api_key = api_key

        # Initialize the appropriate backend
        self._init_backend()

        super().__init__(name=name, description=description, n_sandboxes=n_sandboxes)

    def _init_backend(self):
        """Initialize the selected backend interpreter."""
        if self.backend_type == "local":
            self.backend: LCBPythonInterpreter | E2BPythonInterpreter | TogetherCodeTool = LCBPythonInterpreter()
        elif self.backend_type == "e2b":
            self.backend = E2BPythonInterpreter(n_sandboxes=self.n_sandboxes, api_key=self.api_key)
        elif self.backend_type == "together":
            self.backend = TogetherCodeTool(api_key=self.api_key)
        else:
            raise ValueError(f"Unsupported backend type: {self.backend_type}")

    def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
        """
        Execute Python code using the selected backend.

        Args:
            code: Python code to execute
            timeout: Maximum execution time in seconds
            **kwargs: Additional parameters specific to the backend implementation

        Returns:
            CodeToolOutput containing execution results, stdout, and stderr
        """
        return self.backend.forward(code=code, timeout=timeout, **kwargs)

    def _init_sandbox(self):
        """Initialize the sandbox environment."""
        if hasattr(self, "backend") and hasattr(self.backend, "_init_sandbox"):
            self.backend._init_sandbox()

    def _kill_sandbox(self):
        """Clean up all sandbox resources."""
        if hasattr(self, "backend") and hasattr(self.backend, "_kill_sandbox"):
            self.backend._kill_sandbox()

    def _restart_sandbox(self):
        """Restart the sandbox environment."""
        if hasattr(self, "backend") and hasattr(self.backend, "_restart_sandbox"):
            self.backend._restart_sandbox()
        else:
            self._kill_sandbox()
            self._init_backend()

    @property
    def json(self) -> dict[str, Any]:
        """Return the tool's information in the required format."""
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": {
                    "type": "object",
                    "properties": {
                        "code": {
                            "type": "string",
                            "description": "Execute Python code in a sandboxed environment. Returns results and standard output/error.",
                        },
                        "timeout": {
                            "type": "integer",
                            "description": "Maximum execution time in seconds before timing out",
                            "default": 12,
                        },
                    },
                    "required": ["code"],
                },
            },
        }

json `property`

json: dict[str, Any]

Return the tool's information in the required format.

init

__init__(backend: BackendType = 'local', n_sandboxes: int = 1, api_key: str | None = None, name: str = 'python', description: str = 'Execute Python code in a sandboxed environment. Returns results and standard output/error.')

Initialize the unified Python interpreter with the specified backend.

Parameters:

Name	Type	Description	Default
`backend`	`BackendType`	The backend to use ("local", "e2b", "together", or "lcb")	`'local'`
`n_sandboxes`	`int`	Number of concurrent sandboxes/workers to use (for applicable backends)	`1`
`api_key`	`str \| None`	API key for cloud-based backends (e2b, together)	`None`
`name`	`str`	The name of the tool	`'python'`
`description`	`str`	Description of what the tool does	`'Execute Python code in a sandboxed environment. Returns results and standard output/error.'`

Source code in rllm/tools/code_tools/python_interpreter.py

def __init__(self, backend: BackendType = "local", n_sandboxes: int = 1, api_key: str | None = None, name: str = "python", description: str = "Execute Python code in a sandboxed environment. Returns results and standard output/error."):
    """
    Initialize the unified Python interpreter with the specified backend.

    Args:
        backend: The backend to use ("local", "e2b", "together", or "lcb")
        n_sandboxes: Number of concurrent sandboxes/workers to use (for applicable backends)
        api_key: API key for cloud-based backends (e2b, together)
        name: The name of the tool
        description: Description of what the tool does
    """
    self.backend_type = backend
    self.n_sandboxes = n_sandboxes
    self.api_key = api_key

    # Initialize the appropriate backend
    self._init_backend()

    super().__init__(name=name, description=description, n_sandboxes=n_sandboxes)

forward

forward(code: str, timeout: int = 12, **kwargs) -> CodeToolOutput

Execute Python code using the selected backend.

Parameters:

Name	Type	Description	Default
`code`	`str`	Python code to execute	required
`timeout`	`int`	Maximum execution time in seconds	`12`
`**kwargs`		Additional parameters specific to the backend implementation	`{}`

Returns:

Type	Description
`CodeToolOutput`	CodeToolOutput containing execution results, stdout, and stderr

Source code in rllm/tools/code_tools/python_interpreter.py

def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
    """
    Execute Python code using the selected backend.

    Args:
        code: Python code to execute
        timeout: Maximum execution time in seconds
        **kwargs: Additional parameters specific to the backend implementation

    Returns:
        CodeToolOutput containing execution results, stdout, and stderr
    """
    return self.backend.forward(code=code, timeout=timeout, **kwargs)

TogetherCodeTool

Bases: CodeTool

Tool for executing Python code using Together Code Interpreter (TCI).

This tool integrates with Together's Code Interpreter API to provide a secure sandbox environment for executing Python code.

Source code in rllm/tools/code_tools/together_tool.py

class TogetherCodeTool(CodeTool):
    """Tool for executing Python code using Together Code Interpreter (TCI).

    This tool integrates with Together's Code Interpreter API to provide
    a secure sandbox environment for executing Python code.
    """

    def __init__(
        self,
        api_key: str | None = TOGETHER_API_KEY,
    ):
        """Initialize the TogetherCodeTool.

        Args:
            api_key: Together API key (optional, will use environment variables if not provided)
            name: The name of the tool
            description: Description of what the tool does
        """
        self.api_key = api_key
        self.client = self._setup_client()
        self.session_id: str | None = None
        super().__init__(name="together_python", description="Execute Python code using Together Code Interpreter.")

    def _setup_client(self):
        """Set up the Together client for interacting with the API."""
        try:
            from together import Together

            return Together(api_key=self.api_key)
        except ImportError:
            raise ImportError("The 'together' package is required to use TogetherCodeTool. Install it with 'pip install together'.") from None

    def forward(self, code: str, timeout: int = 12, session_id: str | None = None, **kwargs) -> CodeToolOutput:
        """
        Execute Python code using Together Code Interpreter.

        Args:
            code: Python code to execute
            timeout: Maximum execution time in seconds (not directly used but retained for compatibility)
            session_id: Optional session ID to maintain state between runs
            **kwargs: Additional parameters to pass to Together's code_interpreter.run

        Returns:
            CodeToolOutput containing execution results, stdout, and stderr
        """
        self.client.timeout = timeout
        try:
            # If session_id is provided, use it to maintain state
            if session_id:
                self.session_id = session_id
            # Execute the code
            response = self.client.code_interpreter.run(code=code, language="python", **kwargs)

            # Save the session_id for potential future use
            if hasattr(response.data, "session_id"):
                self.session_id = response.data.session_id
            # Process the outputs
            stdout = ""
            stderr = ""
            output = ""
            error = None

            for output_item in response.data.outputs:
                if output_item.type == "stdout":
                    stdout += output_item.data + "\n"
                elif output_item.type == "stderr":
                    stderr += output_item.data + "\n"
                elif output_item.type == "error":
                    error = output_item.data
                    stderr += output_item.data + "\n"
                else:
                    output += str(output_item.data) + "\n"

            # Return formatted output
            return CodeToolOutput(name=self.name or "together_python", output=output.strip() if output else None, stdout=stdout.strip() if stdout else None, stderr=stderr.strip() if stderr else None, error=error)

        except Exception as e:
            return CodeToolOutput(name=self.name or "together_python", error=f"{type(e).__name__} - {str(e)}", stderr=str(e))

    def _init_sandbox(self):
        """Initialize a new sandbox session by resetting the session ID."""
        self.session_id = None

    def _kill_sandbox(self):
        """Clean up sandbox resources."""
        self.session_id = None

    def _restart_sandbox(self):
        """Restart the sandbox by creating a new session."""
        self.session_id = None

init

__init__(api_key: str | None = TOGETHER_API_KEY)

Initialize the TogetherCodeTool.

Parameters:

Name	Type	Description	Default
`api_key`	`str \| None`	Together API key (optional, will use environment variables if not provided)	`TOGETHER_API_KEY`
`name`		The name of the tool	required
`description`		Description of what the tool does	required

Source code in rllm/tools/code_tools/together_tool.py

def __init__(
    self,
    api_key: str | None = TOGETHER_API_KEY,
):
    """Initialize the TogetherCodeTool.

    Args:
        api_key: Together API key (optional, will use environment variables if not provided)
        name: The name of the tool
        description: Description of what the tool does
    """
    self.api_key = api_key
    self.client = self._setup_client()
    self.session_id: str | None = None
    super().__init__(name="together_python", description="Execute Python code using Together Code Interpreter.")

forward

forward(code: str, timeout: int = 12, session_id: str | None = None, **kwargs) -> CodeToolOutput

Execute Python code using Together Code Interpreter.

Parameters:

Name	Type	Description	Default
`code`	`str`	Python code to execute	required
`timeout`	`int`	Maximum execution time in seconds (not directly used but retained for compatibility)	`12`
`session_id`	`str \| None`	Optional session ID to maintain state between runs	`None`
`**kwargs`		Additional parameters to pass to Together's code_interpreter.run	`{}`

Returns:

Type	Description
`CodeToolOutput`	CodeToolOutput containing execution results, stdout, and stderr

Source code in rllm/tools/code_tools/together_tool.py

def forward(self, code: str, timeout: int = 12, session_id: str | None = None, **kwargs) -> CodeToolOutput:
    """
    Execute Python code using Together Code Interpreter.

    Args:
        code: Python code to execute
        timeout: Maximum execution time in seconds (not directly used but retained for compatibility)
        session_id: Optional session ID to maintain state between runs
        **kwargs: Additional parameters to pass to Together's code_interpreter.run

    Returns:
        CodeToolOutput containing execution results, stdout, and stderr
    """
    self.client.timeout = timeout
    try:
        # If session_id is provided, use it to maintain state
        if session_id:
            self.session_id = session_id
        # Execute the code
        response = self.client.code_interpreter.run(code=code, language="python", **kwargs)

        # Save the session_id for potential future use
        if hasattr(response.data, "session_id"):
            self.session_id = response.data.session_id
        # Process the outputs
        stdout = ""
        stderr = ""
        output = ""
        error = None

        for output_item in response.data.outputs:
            if output_item.type == "stdout":
                stdout += output_item.data + "\n"
            elif output_item.type == "stderr":
                stderr += output_item.data + "\n"
            elif output_item.type == "error":
                error = output_item.data
                stderr += output_item.data + "\n"
            else:
                output += str(output_item.data) + "\n"

        # Return formatted output
        return CodeToolOutput(name=self.name or "together_python", output=output.strip() if output else None, stdout=stdout.strip() if stdout else None, stderr=stderr.strip() if stderr else None, error=error)

    except Exception as e:
        return CodeToolOutput(name=self.name or "together_python", error=f"{type(e).__name__} - {str(e)}", stderr=str(e))

Code Tools

rllm.tools.code_tools

E2BPythonInterpreter

forward

LCBPythonInterpreter

__init__

forward

PythonInterpreter

json property

__init__

forward

TogetherCodeTool

__init__

forward

init

json `property`

init

init