Source code for mesa_llm.reasoning.cot

from typing import TYPE_CHECKING

from mesa_llm.reasoning.reasoning import Observation, Plan, Reasoning

if TYPE_CHECKING:
    from mesa_llm.llm_agent import LLMAgent



[docs]
class CoTReasoning(Reasoning):
    """
    Chain of Thought reasoning with explicit step-by-step analysis before action execution. Uses structured numbered thoughts followed by tool execution. Integrates memory context for informed decision-making.

    Attributes:
        - **agent** (LLMAgent reference)

    Methods:
        - **plan(obs, ttl=1, prompt=None, selected_tools=None, tool_calls="auto")** → *Plan* - Generate synchronous plan with CoT reasoning
        - **async aplan(obs, ttl=1, prompt=None, selected_tools=None, tool_calls="auto")** → *Plan* - Generate asynchronous plan with CoT reasoning

    Reasoning Format:
        Thought 1: [Initial reasoning based on observation]
        Thought 2: [How memory informs the situation]
        Thought 3: [Possible alternatives or risks]
        Thought 4: [Final decision and justification]
        Action: [The action you decide to take]
    """

    def __init__(self, agent: "LLMAgent"):
        super().__init__(agent=agent)


[docs]
    def get_cot_system_prompt(self, obs: Observation) -> str:
        memory = getattr(self.agent, "memory", None)
        agent_persona = getattr(self.agent, "system_prompt", None)
        persona_section = ""
        if isinstance(agent_persona, str) and agent_persona.strip():
            persona_section = (
                "\n        ---\n\n"
                "        # Agent Persona\n"
                f"        {agent_persona.strip()}\n"
            )
        long_term_memory = ""
        if (
            memory
            and hasattr(memory, "format_long_term")
            and callable(memory.format_long_term)
        ):
            long_term_memory = memory.format_long_term()

        short_term_memory = ""
        if (
            memory
            and hasattr(memory, "format_short_term")
            and callable(memory.format_short_term)
        ):
            short_term_memory = memory.format_short_term()

        obs_str = str(obs)

        system_prompt = f"""
        You are an autonomous agent operating in a simulation.
        Use a detailed step-by-step reasoning process (Chain-of-Thought) to decide your next action.
        Your memory contains information from past experiences, and your observation provides the current context.
{persona_section}

        ---

        # Long-Term Memory
        {long_term_memory}

        ---

        # Short-Term Memory (Recent History)
        {short_term_memory}

        ---

        # Current Observation
        {obs_str}

        ---

        # Instructions
        First think through the situation step-by-step, and explain it in the format given below.
        ------------------------------------------------------
        Thought 1: [Initial reasoning based on the observation]
        Thought 2: [How memory informs the situation]
        Thought 3: [Possible alternatives or risks]
        Thought 4: [Final decision and justification]
        Action: [The action you decide to take]
        ------------------------------------------------------
        Keep the reasoning grounded in the current context and relevant history.


        """
        return system_prompt



[docs]
    def plan(
        self,
        prompt: str | None = None,
        obs: Observation | None = None,
        ttl: int = 1,
        selected_tools: list[str] | None = None,
        tool_calls: str | None = "auto",
    ) -> Plan:
        """
        Plan the next (CoT) action based on the current observation and the
        agent's memory.

        ``selected_tools`` is forwarded to ``ToolManager.get_all_tools_schema()``.
        Omitting it or passing ``None`` uses the default behavior of exposing
        all tools, ``[]`` exposes no tools, and a non-empty list restricts
        planning/execution to the named tools.

        ``tool_calls`` controls the execution-phase LiteLLM ``tool_choice``.
        The reasoning pass still keeps tool use disabled with ``"none"``.

        Supported values in Mesa-LLM are:
        - ``None``: defer to LiteLLM/provider default behavior. In practice,
          this usually means no tool calls when no tools are provided and
          behavior similar to ``"auto"`` when tools are available.
        - ``"none"``: never return tool calls; return a normal assistant
          message instead.
        - ``"auto"``: allow the model to either return a normal assistant
          message or call one or more tools.
        - ``"required"``: require the model to call one or more tools.
        """
        # If no prompt is provided, use the agent's default step prompt
        if prompt is None:
            if self.agent.step_prompt is not None:
                prompt = self.agent.step_prompt
            else:
                raise ValueError("No prompt provided and agent.step_prompt is None.")

        if obs is None:
            obs = self.agent.generate_obs()

        llm = self.agent.llm
        system_prompt = self.get_cot_system_prompt(obs)

        rsp = llm.generate(
            prompt=prompt,
            tool_schema=self.agent.tool_manager.get_all_tools_schema(selected_tools),
            tool_choice="none",
            system_prompt=system_prompt,
        )

        chaining_message = rsp.choices[0].message.content
        self.agent.memory.add_to_memory(
            type="plan", content={"content": chaining_message}
        )

        # Pass plan content to agent for display
        if hasattr(self.agent, "_step_display_data"):
            self.agent._step_display_data["plan_content"] = chaining_message
        cot_plan = self.execute_tool_call(
            chaining_message,
            selected_tools=selected_tools,
            ttl=ttl,
            tool_calls=tool_calls,
        )

        return cot_plan



[docs]
    async def aplan(
        self,
        prompt: str | None = None,
        obs: Observation | None = None,
        ttl: int = 1,
        selected_tools: list[str] | None = None,
        tool_calls: str | None = "auto",
    ) -> Plan:
        """
        Asynchronous version of plan() method for parallel planning.

        ``selected_tools`` follows the same contract as ``plan()``: omitting
        it or passing ``None`` uses the default behavior of exposing all
        tools, ``[]`` exposes no tools, and a non-empty list restricts
        planning/execution to the named tools.

        ``tool_calls`` controls the execution-phase LiteLLM ``tool_choice``.
        The reasoning pass still keeps tool use disabled with ``"none"``.

        Supported values in Mesa-LLM are:
        - ``None``: defer to LiteLLM/provider default behavior. In practice,
          this usually means no tool calls when no tools are provided and
          behavior similar to ``"auto"`` when tools are available.
        - ``"none"``: never return tool calls; return a normal assistant
          message instead.
        - ``"auto"``: allow the model to either return a normal assistant
          message or call one or more tools.
        - ``"required"``: require the model to call one or more tools.
        """
        # If no prompt is provided, use the agent's default step prompt
        if prompt is None:
            if self.agent.step_prompt is not None:
                prompt = self.agent.step_prompt
            else:
                raise ValueError("No prompt provided and agent.step_prompt is None.")

        if obs is None:
            obs = await self.agent.agenerate_obs()

        llm = self.agent.llm
        system_prompt = self.get_cot_system_prompt(obs)

        rsp = await llm.agenerate(
            prompt=prompt,
            tool_schema=self.agent.tool_manager.get_all_tools_schema(selected_tools),
            tool_choice="none",
            system_prompt=system_prompt,
        )

        chaining_message = rsp.choices[0].message.content
        await self.agent.memory.aadd_to_memory(
            type="plan", content={"content": chaining_message}
        )

        # Pass plan content to agent for display
        if hasattr(self.agent, "_step_display_data"):
            self.agent._step_display_data["plan_content"] = chaining_message
        cot_plan = await self.aexecute_tool_call(
            chaining_message,
            selected_tools=selected_tools,
            ttl=ttl,
            tool_calls=tool_calls,
        )

        return cot_plan