forked from sarah-keren/MAC
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMAC_CF_plan_check.py
162 lines (119 loc) · 13.8 KB
/
MAC_CF_plan_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python
# coding: utf-8
# **Multi-Agent Contorl**
# The purpose of this library is to support different forms of multi-agent control.
# The (current) focus is on setting with a single thread : a single thread collects the actions to be performed by each agent (the joint actions) and performs it.
#
# The difference between the different approaches to control is in the agent that decides which action each agent performs. As can be seen in the image below, the two extreme cases are fully-centralized settings, in which a single agent (i.e. the controller) decide which agent each agent performs (think: dictatorship), and fully-decentralized settings, in which each agent makes its own decision on how to behave (think: anarchy).
#
# ![control-specturm.png]()
# The framework is comprised of three main components:
#
# * **Control** supports the different approaches to control. It includes a single
# generic class named Controller that includes the thread that runs the systems, iteratively collects the joint actions from the agents, and executes them. The way joint actions are computed differs between the implementation of the sub-classes.
# * **Agents** supports different AI approaches for decision making (e.g., planning, RL etc). It contains an Agent class that is initialized with the agent's DecisionMaker (that represents the decision making procedure that maps states to actions) and the sensor function of the agent that maps the current state of the world to the agent's observation of it.
# * **Environments** includes an interface class named env_wrapper to the environments in which the agents operate.
#
#
# ---
#
# Let's install the Multi Agent Control (MAC) library as well as the Multi-Taxi environment, on which we will demonstrate the different control approaches.
#
#
#
# In[1]:
# don't need this with conda env
# !pip install git+https://github.com/sarah-keren/MAC
# !pip install git+https://github.com/sarah-keren/multi_taxi
# Since the focus is on demonstrating the different control dynamics and not on the decision making process, we offer an example in which decision making is random - at each state a random action is selected. We will demonstrate this in a fully centralized and decentralized setting. Let's import the relevant clases.
#
# In[6]:
from multi_taxi.env.state import MultiTaxiEnvState
from multi_taxi.utils.types import Event
from src.control.controller_decentralized import DecentralizedController
from src.control.controller_centralized import CentralizedController
from src.agents.agent import Agent, RandomDecisionMaker
from src.decision_makers.Deap_learner import LearningDecisionMaker, SinglePassengerPosWrapper, SingleTaxiWrapper, TaxiObsPrepWrapper
from src.decision_makers.PPO_DM1 import PPODecisionMaker
from src.decision_makers.Short_Term_Planner_DM import ST_Planner
from src.environments.env_wrapper import EnvWrappper, EnvWrappperGym, env_pos_change
import matplotlib.pyplot as plt
from multi_taxi import multi_taxi_v0 as TaxiEnv
from multi_taxi import ObservationType
from multi_taxi import wrappers as WP
from src.decision_makers.MA_decision_makers.MA_AddOn_DM import *
from agents import BCAgent
# some helpful functions
def man_dist(point_a,point_b):
# calculate manhatten distance from 2 2D points
return abs(point_a[0]-point_b[0])+abs(point_a[1]-point_b[1])
# check if some other Taxi is close to named taxi - conflict function over state
def cf_close_to_other(state : MultiTaxiEnvState,name,dist = 2,**kwargs):
t_dict = state.taxi_by_name
my_pos = t_dict[name].location
for t in t_dict.keys():
if t==name: continue
if man_dist(my_pos,t_dict[t].location)<=dist:
return True
return False
# Now, let's create a multi-taxi environment and its MAC wrapper.
# env = TaxiEnv.parallel_env(num_taxis=4, num_passengers=3, pickup_only=True, observation_type='symbolic', render_mode='human')
env = TaxiEnv.parallel_env(num_taxis=2,
num_passengers=2,
pickup_only=True,
observation_type=ObservationType.MIXED,
field_of_view=[None, None],
render_mode='human')
# using FIXED places wrappers to set start locations
# env = WP.FixedPassengerStartLocationsWrapper(env, 2, 4, 6, 4)
# env = WP.FixedTaxiStartLocationsWrapper(env, 2,1,2,7)
# # changing reward table for collision
# for k in env.unwrapped.reward_table.values():
# k[Event.COLLISION] = -50
# reset and render to check
# obs = env.reset()
# env.render()
# making sure env.agents has taxis name
try:
env = EnvWrappper(env, env.agents)
except:
env.agents = env.possible_agents
env = EnvWrappper(env, env.agents)
env.reset()
print('EnvironmentWrapper created')
# ## Decentralized Control
#
# Let's create a decenrelized contorl setting, in which each agent chooses randomly the action to perform.
#
#
decentralized_agents = {}
# # first we set random DM agents:
# decentralized_agents = {agent_name: Agent(RandomDecisionMaker(env.env.action_spaces[agent_name])) #can use diffrent DM
# for agent_name in env.env.agents}
# # all PPO agents
# decentralized_agents = {agent_name: Agent(PPODecisionMaker(env.env.action_spaces[agent_name],env.env,agent_name))
# for agent_name in env.env.agents}
# ---------------------------------------------------------------------------------------------------------
# Conflict_Function - we set a conflict function to help agents to decide whether to use MA DM or its own DM
# ---------------------------------------------------------------------------------------------------------------
# create all agents, using MA_Prune_DM (if cf is on - use MA_DM that prune the worst MA action and return it to
# it's own DM whitout the pruned action, if cf is off uses its own original DM)
# stohastic action - if on, add patience feature to decide stohasticly when agent hold still for MA reasons,
# as long as it waits more he loses its patience and with higher probability of using its own DM
for agent_name in env.env.agents:
ag_name = agent_name
# cf1 = Conflict_Function(None, cf_close_to_other,ag_name,True, man=5, dist=3)
cf1 = Conflict_Function(func_obs=None, func_state=cf_close_to_other,name=ag_name, is_binary=True, dist=4)
dm = MA_prune_DM(my_DM=BCAgent(env, ag_name, '2_passenger_bc'),
MA_DM=pickup_DM(env.env.action_spaces[ag_name]),
conflict_function=cf1,
action_space=env.env.action_spaces[ag_name],
prune_MA_flag=True,
AgentName=ag_name,
stohastic_action=True)
ag = Agent(decision_maker=dm, AgentName=ag_name)
decentralized_agents[ag_name] = ag
# Here, the action to perform is collected by each agent, we use the known MAC controller
controller = DecentralizedController(env, decentralized_agents)
controller.run(render=True, max_iteration=60)
# %%