Source code for fusion.interfaces.control_policy

"""
ControlPolicy protocol for unified path selection.

This module defines the ControlPolicy protocol that all path selection
strategies must implement: heuristics, RL policies, and supervised/unsupervised learning policies.

The protocol uses Python's structural typing (Protocol) to allow any class
with the required methods to be used as a policy without explicit inheritance.

Example:
    >>> class MyPolicy:
    ...     def select_action(self, request, options, network_state) -> int:
    ...         return 0  # Always select first option
    ...     def update(self, request, action, reward) -> None:
    ...         pass  # No learning
    ...     def get_name(self) -> str:
    ...         return "MyPolicy"
    >>>
    >>> policy = MyPolicy()
    >>> isinstance(policy, ControlPolicy)  # True
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Protocol, TypeAlias, runtime_checkable

if TYPE_CHECKING:
    from fusion.domain.network_state import NetworkState
    from fusion.domain.request import Request
    from fusion.modules.rl.adapter import PathOption



[docs]
@runtime_checkable
class ControlPolicy(Protocol):
    """
    Protocol for control policies that select actions for resource allocation.

    This protocol defines the interface for all path selection strategies
    in the FUSION simulation framework. Implementations include:

    - Heuristic policies: Rule-based selection (first-fit, shortest-path)
    - RL policies: Reinforcement learning agents (PPO, DQN, etc.)
    - Supervised/unsupervised policies: Pre-trained neural networks or classifiers
    - Composite policies: FallbackPolicy, TiebreakingPolicy

    All policies must:

    1. Respect feasibility: Only select paths where PathOption.is_feasible is True
    2. Return valid indices: Return 0 to len(options)-1, or -1 for no valid action
    3. Never mutate state: NetworkState must remain unchanged during select_action
    4. Provide a name: Return descriptive name via get_name() for logging

    Example:
        >>> policy = FirstFeasiblePolicy()
        >>> action = policy.select_action(request, options, network_state)
        >>> if action >= 0:
        ...     result = orchestrator.apply_action(action, request, options)
        ...     policy.update(request, action, result.reward)
        ...     logger.info(f"Policy {policy.get_name()} selected action {action}")
        >>> else:
        ...     # No feasible path - request blocked
        ...     handle_blocking(request)
    """


[docs]
    def select_action(
        self,
        request: Request,
        options: list[PathOption],
        network_state: NetworkState,
    ) -> int:
        """
        Select an action (path index) for the given request.

        This method is the core decision-making interface. It receives the
        current request, available path options (with feasibility information),
        and read-only network state. It returns the index of the selected path.

        :param request: The incoming request to serve. Contains source, destination,
            bandwidth requirements, and timing information.
        :type request: Request
        :param options: List of available path options, each with path_index,
            path, weight_km, is_feasible, congestion, slots_needed, modulation.
            For protected paths, also includes backup_path, backup_feasible.
        :type options: list[PathOption]
        :param network_state: Current state of the network. This is read-only;
            policies must not modify network state.
        :type network_state: NetworkState
        :return: Path index (0 to len(options)-1) for the selected path,
            or -1 if no valid action exists.
        :rtype: int

        .. note::
            - Policies MUST only return indices where options[i].is_feasible is True
            - For protected paths, check options[i].both_paths_feasible for full protection
            - Returning an infeasible index is undefined behavior (orchestrator may reject)
        """
        ...



[docs]
    def update(self, request: Request, action: int, reward: float) -> None:
        """
        Update policy based on experience.

        Called after an action is executed and the reward is computed. This
        enables online learning for RL policies. Heuristic and pre-trained
        supervised/unsupervised policies typically implement this as a no-op.

        :param request: The request that was served
        :type request: Request
        :param action: The action (path index) that was taken
        :type action: int
        :param reward: The reward received. Typically positive for successful
            allocation, negative for blocking.
        :type reward: float

        .. note::
            - Heuristic policies should implement this as `pass`
            - RL policies may update internal state, replay buffers, etc.
            - Supervised/unsupervised policies (pre-trained) typically implement as `pass`
            - This method should not raise exceptions
        """
        ...



[docs]
    def get_name(self) -> str:
        """
        Return the policy name for logging and metrics.

        This method enables meaningful logging messages and metrics tracking.
        Names should be descriptive and include relevant configuration.

        :return: Human-readable policy name (e.g., "FirstFeasiblePolicy",
            "RLPolicy(PPO)", "SupervisedPolicy(pytorch)")
        :rtype: str
        """
        ...




# Type alias for policy action results
PolicyAction: TypeAlias = int
"""Type alias for policy action: -1 for invalid, 0 to k-1 for valid path index."""