import copy
from typing import TYPE_CHECKING
from mesa_llm.reasoning.reasoning import (
Observation,
Plan,
Reasoning,
)
if TYPE_CHECKING:
from mesa_llm.llm_agent import LLMAgent
[docs]
class ReWOOReasoning(Reasoning):
"""
Reasoning Without Observation for multi-step planning without environmental feedback. Enables multi-step planning without requiring immediate environmental feedback. Plans remain valid across multiple simulation steps with extended TTL. Reduces computational overhead through strategic long-term thinking.
Attributes:
- **agent** (LLMAgent reference)
- **remaining_tool_calls** (int) - Number of tool calls remaining in current plan
- **current_plan** (Plan) - Currently active multi-step plan
- **current_obs** (Observation) - Last observation used for planning
Methods:
- **plan(prompt, obs=None, ttl=1, selected_tools=None, tool_calls="auto")** → *Plan* - Generate synchronous plan with ReWOO reasoning
- **async aplan(prompt, obs=None, ttl=1, selected_tools=None, tool_calls="auto")** → *Plan* - Generate asynchronous plan with ReWOO reasoning
"""
def __init__(self, agent: "LLMAgent"):
super().__init__(agent=agent)
self.remaining_tool_calls = 0 # Initialize remaining tool calls
self.current_plan: Plan | None = None
self.current_obs: Observation | None = None
[docs]
def get_rewoo_system_prompt(self, obs: Observation) -> str:
memory = getattr(self.agent, "memory", None)
agent_persona = getattr(self.agent, "system_prompt", None)
persona_section = ""
if isinstance(agent_persona, str) and agent_persona.strip():
persona_section = (
"\n ---\n\n"
" # Agent Persona\n"
f" {agent_persona.strip()}\n"
)
long_term_memory = ""
if (
memory
and hasattr(memory, "format_long_term")
and callable(memory.format_long_term)
):
long_term_memory = memory.format_long_term()
short_term_memory = ""
if (
memory
and hasattr(memory, "format_short_term")
and callable(memory.format_short_term)
):
short_term_memory = memory.format_short_term()
system_prompt = f"""
You are an autonomous agent that creates multi-step plans without re-observing during execution.
Using the ReWOO (Reasoning WithOut Observation) approach, you will create a comprehensive plan
that anticipates multiple steps ahead based on your current observation and memory.
{persona_section}
---
# Long-Term Memory
{long_term_memory}
---
# Short-Term Memory (Recent History)
{short_term_memory}
---
# Current Observation
{obs}
---
# Instructions
Create a detailed multi-step plan that can be executed without needing new observations.
Your plan should anticipate likely scenarios and include contingencies.
Determine the optimal number of steps (1-5) based on the complexity of the task and available tools.
Use this format:
"plan": "Describe your overall strategy and reasoning",
"step_1": "First action with expected outcome",
"step_2": "Second action building on Step 1 (optional)",
"step_3": "Third action if needed (optional)",
"step_4": "Fourth action if needed (optional)",
"step_5": "Final action if needed (optional)",
"contingency": "What to do if things don't go as expected"
Only include the steps you need (step_1 is required, step_2 through step_5 are optional).
Set unused step fields to null. The plan should be comprehensive enough to execute
for multiple simulation steps without requiring new environmental observations.
Refer to available tools when planning actions.
---
"""
return system_prompt
[docs]
def plan(
self,
prompt: str | None = None,
obs: Observation | None = None,
ttl: int = 1,
selected_tools: list[str] | None = None,
tool_calls: str | None = "auto",
) -> Plan:
"""
Plan the next (ReWOO) action based on the current observation and the
agent's memory.
``selected_tools`` is forwarded to ``ToolManager.get_all_tools_schema()``.
Omitting it or passing ``None`` uses the default behavior of exposing
all tools, ``[]`` exposes no tools, and a non-empty list restricts
planning/execution to the named tools.
``tool_calls`` controls the execution-phase LiteLLM ``tool_choice``.
The planning pass still keeps tool use disabled with ``"none"``.
Supported values in Mesa-LLM are:
- ``None``: defer to LiteLLM/provider default behavior. In practice,
this usually means no tool calls when no tools are provided and
behavior similar to ``"auto"`` when tools are available.
- ``"none"``: never return tool calls; return a normal assistant
message instead.
- ``"auto"``: allow the model to either return a normal assistant
message or call one or more tools.
- ``"required"``: require the model to call one or more tools.
"""
# If we have remaining tool calls, skip observation and plan generation
if self.remaining_tool_calls > 0:
index_of_tool = (
len(self.current_plan.tool_calls) - self.remaining_tool_calls
)
self.remaining_tool_calls -= 1
tool_call = [self.current_plan.tool_calls[index_of_tool]]
current_plan = copy.copy(self.current_plan)
current_plan.tool_calls = tool_call
return Plan(llm_plan=current_plan, step=self.current_obs.step, ttl=ttl)
# If no prompt is provided, use the agent's default step prompt
if prompt is None:
if self.agent.step_prompt is not None:
prompt = self.agent.step_prompt
else:
raise ValueError("No prompt provided and agent.step_prompt is None.")
if obs is None:
self.current_obs = self.agent.generate_obs()
else:
self.current_obs = obs
llm = self.agent.llm
system_prompt = self.get_rewoo_system_prompt(self.current_obs)
rsp = llm.generate(
prompt=prompt,
tool_schema=self.agent.tool_manager.get_all_tools_schema(selected_tools),
tool_choice="none",
system_prompt=system_prompt,
)
self.agent.memory.add_to_memory(
type="plan", content={"content": rsp.choices[0].message.content}
)
rewoo_plan = self.execute_tool_call(
rsp.choices[0].message.content,
selected_tools=selected_tools,
ttl=ttl,
tool_calls=tool_calls,
)
# Count the number of tool calls in the response and set remaining_tool_calls
self.remaining_tool_calls = len(
getattr(rewoo_plan.llm_plan, "tool_calls", None) or []
)
self.current_plan = rewoo_plan.llm_plan
return rewoo_plan
[docs]
async def aplan(
self,
prompt: str | None = None,
obs: Observation | None = None,
ttl: int = 1,
selected_tools: list[str] | None = None,
tool_calls: str | None = "auto",
) -> Plan:
"""
Asynchronous version of plan() method for parallel planning.
``selected_tools`` follows the same contract as ``plan()``: omitting
it or passing ``None`` uses the default behavior of exposing all
tools, ``[]`` exposes no tools, and a non-empty list restricts
planning/execution to the named tools.
``tool_calls`` controls the execution-phase LiteLLM ``tool_choice``.
The planning pass still keeps tool use disabled with ``"none"``.
Supported values in Mesa-LLM are:
- ``None``: defer to LiteLLM/provider default behavior. In practice,
this usually means no tool calls when no tools are provided and
behavior similar to ``"auto"`` when tools are available.
- ``"none"``: never return tool calls; return a normal assistant
message instead.
- ``"auto"``: allow the model to either return a normal assistant
message or call one or more tools.
- ``"required"``: require the model to call one or more tools.
"""
# If we have remaining tool calls, skip observation and plan generation
if self.remaining_tool_calls > 0:
index_of_tool = (
len(self.current_plan.tool_calls) - self.remaining_tool_calls
)
self.remaining_tool_calls -= 1
tool_call = [self.current_plan.tool_calls[index_of_tool]]
current_plan = copy.copy(self.current_plan)
current_plan.tool_calls = tool_call
return Plan(llm_plan=current_plan, step=self.current_obs.step, ttl=ttl)
# If no prompt is provided, use the agent's default step prompt
if prompt is None:
if self.agent.step_prompt is not None:
prompt = self.agent.step_prompt
else:
raise ValueError("No prompt provided and agent.step_prompt is None.")
if obs is None:
self.current_obs = await self.agent.agenerate_obs()
else:
self.current_obs = obs
llm = self.agent.llm
system_prompt = self.get_rewoo_system_prompt(self.current_obs)
rsp = await llm.agenerate(
prompt=prompt,
tool_schema=self.agent.tool_manager.get_all_tools_schema(selected_tools),
tool_choice="none",
system_prompt=system_prompt,
)
await self.agent.memory.aadd_to_memory(
type="plan", content={"content": rsp.choices[0].message.content}
)
rewoo_plan = await self.aexecute_tool_call(
rsp.choices[0].message.content,
selected_tools=selected_tools,
ttl=ttl,
tool_calls=tool_calls,
)
# Count the number of tool calls in the response and set remaining_tool_calls
self.remaining_tool_calls = len(
getattr(rewoo_plan.llm_plan, "tool_calls", None) or []
)
self.current_plan = rewoo_plan.llm_plan
return rewoo_plan