emergentcommunication.py

# -*- coding: utf-8 -*-
"""EmergentCommunication.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1Oke5DJuiKzRktEOvXVe5N3k5_e2zWglt

Authors: Ran Harari and Adi Amuzig  
E-mails: [srharari@campus.technion.ac.il](srharari@campus.technion.ac.il), 
         [adi.amuzig@campus.technion.ac.il](mailto:adi.amuzig@campus.technion.ac.il)

# Emergent Communication

Because the real world is dynamic and continually changing, many real-world scenarios demand coordination among different actors, and having a centralized system to oversee all actions is not possible in a large-scale environment.

Furthermore, communication is quite beneficial in resolving the coordination challenge. In theory, we could simply transfer sensory data among robots, but this would be a lot of information that would be irrelevant because each unit will perceive the world differently owing to its location and sensors.

The interaction between two agents with communication abilities follows a typical cycle with three levels: sensori-motor, conseptual, and language. The cycle is as follows:

1. **Grounding** - Both agents precive the world and creating a world model (sensori-motor level)
2. **Conceptualization** - The speaker determines which information is most useful to the listener in order for the mission to succeed and builds a semantic framework (conceptual level)
3. **Production** - conveying a message of semantic experience through verbal or implicit behaviors (language level)
4. **Comprehension** - The communication is received and analyzed by the listener into a semantic form (language level)
5. **Interpretation** - interpreting the semantic structure in reference to one's own world model and carrying out the appropriate action (conceptual level)

![Communication Cycle](images/emergent_com_theory.png)
[A Practical Guide to Studying Emergent Communication through Grounded Language Games - Nevens, Eecke and Beuls]

When it comes to communication, there are two primary options:

- A pre-defined communication protocol, which means that all communication and the actions that occur from that communication are programmed into the system. Before the system goes live, all aspects of the communication cycle are fully specified.
- Emergent communication is the study of how a communication system emerges among previously uncommunicative individuals. To put it another way, at least some components of the communication cycle must be learned. It is important to note that emergent communication necessitates many interactions between the same agent in order to *learn* a functional communication system.

## The Importance of communication

Communication in general is helpful in a multi-robot system for both robot cooperation and human robot collaboration. This session will concentrate on the former. 

To investigate the role of communication, we used the multitaxi domain to create a situation with no communication between the agents. To confirm that our communication strategy worked, we first solved the multitaxi domain without communication and demonstrated that it works.
"""

import math

from src.Communication.COM_net import COM_net
from src.agents.agent import DecisionMaker, Action_message_agent, Agent_Com, RandomDecisionMaker
from src.control.Controller_COM import DecentralizedComController
from src.decision_makers.planners.map_planner import AstarDM
from src.environments.env_wrapper import EnvWrappper, TAXI_pickup_dropoff_REWARDS
from multi_taxi import MultiTaxiEnv


MAP2 = [
    "+-------+",
    "| : |F: |",
    "| : | : |",
    "| : : : |",
    "| | :G| |",
    "+-------+",
]

MAP = [
    "+-----------------------+",
    "| : |F: | : | : | : |F: |",
    "| : | : : : | : | : | : |",
    "| : : : : : : : : : : : |",
    "| : : : : : | : : : : : |",
    "| : : : : : | : : : : : |",
    "| : : : : : : : : : : : |",
    "| | :G| | | :G| | | : | |",
    "+-----------------------+",
]

"""
Builds Multi_taxi env
"""
env = MultiTaxiEnv(num_taxis=2, num_passengers=2, domain_map=MAP, observation_type='symbolic')

env.seed(0)

obs = env.reset()

def build_multi_env(env):
    env.agents = env.taxis_names
    # print(f"{env.agents}\n")
    env.action_spaces = {
        agent_name: env.action_space for agent_name in env.agents
    }
    env.observation_spaces = {
        agent_name: env.observation_space for agent_name in env.agents
    }
    env.possible_agents = [agent for agent in env.agents]
    return EnvWrappper(env, env.agents)


environment = build_multi_env(env)
#
print('EnvironmentWrapper created')

"""
COM-agent - we will explain later
"""

class Heading_message_agent(Agent_Com):

    def __init__(self, decision_maker : AstarDM , sensor_function =None, message_filter = None, AgentName = None, bandW = math.inf, union_recieve = True):
        super().__init__(decision_maker , sensor_function, message_filter, AgentName, bandW, union_recieve)
        self.last_action = None

    def set_data_func(self, obs):
        data = (self.decision_maker.taking_passenger,len(self.decision_maker.active_plan))
        return data

    # todo - implement your recive_func
    def set_recive_func(self, obs, message):
        pass

    # saves last action of the agent - not necessary for com module
    def set_last_action(self, action):
        self.last_action = action

"""
part 1 - 
agents sends com but dont use it - A-star DM solves for each agent
"""
# after having our com-Astar-agents class we can set our env agents into a dicentralized_agents dict
env_agents = environment.get_env_agents()
decentralized_agents = {agent_name: Heading_message_agent(AstarDM(env ,single_plan=True, Taxi_index=int(agent_name[-1]), domain_map=MAP) ,AgentName=agent_name)             # Agent(LearningDecisionMaker(env.action_space))  # can use diffrent DM
                        for agent_name in env_agents}


"""
COM-module - we will explain later
"""
com = COM_net()
""" 
    - MAC controller 
"""
controller = DecentralizedComController(environment, decentralized_agents, com)
"""
activate 
"""
controller.run(render=True, max_iteration=15)
print("Thats all - part 1")

"""In the above example, we can see that both agents intended to go to the same location to pick up the same passenger. The first agent to arrive at the passenger's location got to pick him up, while the other wasted effort and received nothing in return.

## Creating the Communication Infrastructer

In our solution we modeld the world as a POMDP usign the tuple $\langle S, s_{0}, A, P, R, \gamma, \Omega, O \rangle$ where 

- $S$ is the set of all possible environment configurations,
- $s_{0}$ the initial state,
- $A$ the action space, is the list of actions the taxi agents can execute within the environment **including communication**,
- $P$ is a state transition probability function,
- $R$ is the reward function,
- $\gamma \in [0,1]$ is a discount factor,
- $\Omega$ is a set of observations where the **information form the observations also becomes an observation**, and
- $O$ is an observation function


## MAC-COM - simple to use: 

<img src='images/MAC-com.jpg' width="800" height="800">


##### All you need for using com module:
    - Implement Agent_com class (inherit from Agent_com)
        for this agent: 
        1 - make sure to implement set_data_func -  decides what data to transmit whenever it called
        2 - recieve_func - implement yours to decide what to do with a recieved data
            (defualt is the union_func that add the message data to the observation)
     - implement COM module (network) - and use COM-controller
        3 - defualt is all recieve the last messages as Joint_Message
<img src='images/MAC_COM_pic.jpg' width="500" height="500">
"""

environment = build_multi_env(env)
#
print('EnvironmentWrapper created')

"""
in order to use com module: 
    - implement Agent_com class (inherit from Agent_com
    - make sure to implement set_data_func - that decides what is the data that the agent will transmit whenever it called
    - u can implement your recieve_func - that decides what to do with a recieved data
        defualt is the union_func that add the message data to the observation 
"""

class Heading_message_agent(Agent_Com):

    def __init__(self, decision_maker , sensor_function =None, message_filter = None, AgentName = None, bandW = math.inf, union_recieve = True):
        super().__init__(decision_maker , sensor_function, message_filter, AgentName, bandW, union_recieve)
        self.last_action = None

    def set_data_func(self, obs):
        data = (self.last_action)
        return data

    # todo - implement your recive_func (defualt is union with obs)
    # def set_recive_func(self, obs, message):
    #     pass

    # saves last action of the agent - not necessary for com module
    def set_last_action(self, action):
        self.last_action = action

# after having our com-agent class we can set our env agents into a dicentralized_agents dict with a Random DM
env_agents = environment.get_env_agents()
decentralized_agents = {agent_name: Heading_message_agent(RandomDecisionMaker(env.action_space) ,AgentName=agent_name)             # Agent(LearningDecisionMaker(env.action_space))  # can use diffrent DM
                        for agent_name in env_agents}


"""
Simple use of communication network - 
    - build one using COM_net() - defualt architecture is - all masseges sent to all other agents
            *U can use more options - see at COM_net() class doc.
"""
com = COM_net()
""" 
    - initailze our new controller (DecentralizedComController) - using our env, our agents and our com module
        - this controller will perform all joint action and message delieveries at any time-step
"""
controller = DecentralizedComController(environment, decentralized_agents, com)
"""
activate 
"""
controller.run(render=True, max_iteration=3)

"""## Types of Communication

When it comes to communication, we may split it into two categories:

- **Explicit communication** – A direct, deliberate form of communication, where there is a clear associated intent for the transmitted information to be received by another agent or system over an established channel.

<img src='https://zanzidigital.co.uk/wp-content/uploads/2017/11/chatbot-image.jpg' width=700/>


- **Implicit communication** – Information is inferred, meaning This form of communication involves an action (or practical behaviour) representing as a message in itself, rather than a message being conveyed through language or codified gestures (such as a thumbs-up or a head nod).
<img src='https://www.robocup.org/system/leagues/images/000/000/001/list/soccer.png?1461148054' width=800/>

In the multiTaxi scenario, for example, implicit communication might take the form of the initial route each taxi takes (assuming that all agents are aware of each other's movements), but explicit communication can take the form of each taxi stating which passenger they want to pick up. There are other more explicit and implicit communication methods that might be used.

The agents in the last example employ **explicit communication** to relay a message to the other agents. They specify which passenger they are going to in the message. We have completed the first three steps of the communication cycle, which means that the agents still do not know how to use communication to their benefit, nor do they know when to communicate.

In the following example - we use a simple rule-based (grounded communication rule) in order to 'wisely-use' other taxis messages: if someones has a shortest path to handle your target passenger, switch to other passenger.
"""

"""
part 2 - 
com_emerge_use of message for getting better + introducing com-module
"""
from src.decision_makers.planners.MA_com_planner import Astar_message_DM


"""
Builds Multi_taxi env
"""
m = MAP
env = MultiTaxiEnv(num_taxis=3, num_passengers=5, domain_map=m, observation_type='symbolic',rewards_table=TAXI_pickup_dropoff_REWARDS ,option_to_stand_by=True)
obs = env.reset()

environment = build_multi_env(env)
#
print('EnvironmentWrapper created')

# making agent class that communicates and heads towards 1 passenger (pickup->dropoff)
"""
in order to use com module: 
    - implement Agent_com class (inherit from Agent_com
    - make sure to implement set_data_func - that decides what is the data that the agent will transmit whenever it called
    - u can implement your recieve_func - that decides what to do with a recieved data
        defualt is the union_func that add the message data to the observation 
"""

class Heading_message_agent(Agent_Com):

    def __init__(self, decision_maker : Astar_message_DM , sensor_function =None, message_filter = None, AgentName = None, bandW = math.inf, union_recieve = False):
        super().__init__(decision_maker , sensor_function, message_filter, AgentName, bandW, union_recieve)
        self.last_action = None
        self.last_message = None

    def set_data_func(self, obs):
        data = (self.decision_maker.taking_passenger,len(self.decision_maker.active_plan))
        return data

    # implement our recive_func
    def set_recive_func(self, obs, message):
        self.last_message = message
        self.decision_maker.save_last_message(message)
        # self.decision_maker.updateplan_message(message)

    # saves last action of the agent - not necessary for com module
    def set_last_action(self, action):
        self.last_action = action
#
#


# after having our com-Astar-agents class we can set our env agents into a dicentralized_agents dict
env_agents = environment.get_env_agents()
decentralized_agents = {agent_name: Heading_message_agent(Astar_message_DM(env ,single_plan=True, Taxi_index=int(agent_name[-1]), domain_map=m) ,AgentName=agent_name)             # Agent(LearningDecisionMaker(env.action_space))  # can use diffrent DM
                        for agent_name in env_agents}


"""
Simple use of communication network - 
    - build one using COM_net() - defualt architecture is - all masseges sent to all other agents
            *U can use more options - see at COM_net() class doc.
"""
com = COM_net()
""" 
    - initailze our new controller (DecentralizedComController) - using our env, our agents and our com module
        - this controller will perform all joint action and message delieveries at any time-step
"""
controller = DecentralizedComController(environment, decentralized_agents, com)
"""
activate 
"""
#communicate first
controller.send_recieve()

#run (communication inside after each time_click)
controller.run(render=True, max_iteration=20,reset=True)
print("Thats all - part 2")

"""## Using Communication to Our Advantage

Having established that the agents can send and receive messages, they must now learn how to utilize them and what to do with them. They are pointless if they do not behave in line with the massages. In emergent communication we can learn how to do it autonomously
"""

"""
part 2.2 - 
agents learn now - hierarchical_tasks
"""

# import hierarchical_tasks multi taxi env
from src.environments.hirarchical_Wrapper import Multi_Taxi_Task_Wrapper
from src.decision_makers.planners.Com_High_level_Planner import Astar_message_highlevel_DM
"""
Builds Multi_taxi env
"""
m = MAP
env = MultiTaxiEnv(num_taxis=3, num_passengers=5, domain_map=m, observation_type='symbolic',rewards_table=TAXI_pickup_dropoff_REWARDS ,option_to_stand_by=True)

obs = env.reset()

environment = build_multi_env(env)

environment =  Multi_Taxi_Task_Wrapper(environment)
#
print('EnvironmentWrapper Multi_Taxi_Task_Wrapper created')

# making agent class that communicates and heads towards 1 passenger (pickup->dropoff)

class Heading_message_agent(Agent_Com):

    def __init__(self, decision_maker : Astar_message_highlevel_DM , sensor_function =None, message_filter = None, AgentName = None, bandW = math.inf, union_recieve = False):
        super().__init__(decision_maker , sensor_function, message_filter, AgentName, bandW, union_recieve)
        self.last_action = None
        self.last_message = None

    def set_data_func(self, obs):
        data = (self.decision_maker.taking_passenger,len(self.decision_maker.active_plan))
        return data

    # implement our recive_func
    def set_recive_func(self, obs, message):
        self.last_message = message
        self.decision_maker.save_last_message(message)
        # self.decision_maker.updateplan_message(message)

    # saves last action of the agent - not necessary for com module
    def set_last_action(self, action):
        self.last_action = action


# run again - high-level
env_agents = environment.get_env_agents()
decentralized_agents = {agent_name: Heading_message_agent(Astar_message_highlevel_DM(env ,single_plan=True, Taxi_index=int(agent_name[-1]), domain_map=m) ,AgentName=agent_name)             # Agent(LearningDecisionMaker(env.action_space))  # can use diffrent DM
                        for agent_name in env_agents}

controller = DecentralizedComController(environment, decentralized_agents, com)
"""
activate 
"""
#communicate first
controller.send_recieve()

#run (communication inside after each time_click)
controller.run(render=True, max_iteration=250,reset=True)

# SHOW REWARDS
reward = controller.total_rewards
totals = {}
for r in reward[0]:
    totals[r] = 0
total=0
for r in reward:
    for key, value in r.items():
        totals[key]+=value
        total+=value
print(f"----------------------------------\n total reward of all agents: {total}, {totals} \n----------------------------------")
print("Thats all - part 2.2")

"""We can see a major performence improvement - this method can easily solve a wide, Multi-Agent domain, using low-bandwidth com.

After we thought about the solution of the 5th and final step ('Interpretation') by ourself, We can design a RL Agent that can learn how to interpret and act by it own. 
#### PPO RL agent learn how to act in the high-level action space (X - handling passenger x, 0 - stby)

we show how high-level single agent can learn how to act.
further research - learning what to communicate (cooperative/adverserial), and learning how to interpret-act acorrding to other agents 'signals'
"""

"""
part 3 - 
agents learn now - high level PPO, low-level a-star
"""

from src.decision_makers.High_level_learner import LearningHighLevelDecisionMaker

env = MultiTaxiEnv(num_taxis=1, num_passengers=2, domain_map=MAP2, observation_type='symbolic', option_to_stand_by=True)
env.agents = env.taxis_names
env = EnvWrappper(env, env.agents)
env = Multi_Taxi_Task_Wrapper(env)
obs = env.reset()

D_M = LearningHighLevelDecisionMaker(env.action_space)
obs = env.reset()
env.env.env.render()
for i in range(50):
    print(f"obs:{type(obs)}")
    a = D_M.get_action(obs)
    print(f"next action: {env.index_action_dictionary[a]}")
    obs, r, done, info = env.step(a)
    env.env.env.render()
    if done: break
print("that all - part 3")

"""## Compilling Communication for RL as an Action + Com-Reward 

To explain implicit communication we will look at the Speaker Lister environment. In this environment there are 2 agents in which one agent (speaker) has information about the goal and has a limmited mode of communication with the second agent (listener) must use the speaker's communications and its limitted observations to navigate a 2D space toward the goal. The speaker agent cannot navigate, and the listener object cannot communicate.

We used this setting to implement our emergent communication in the form of deciding when to 'communicate' - SPEAKER will allways 'point' on the right target, Listener should learn when to use the action to observe the Speaker's pointer (or in other wards, Listener learns when to ask for help.

#### speaker-listener com as action - env-wrapper
#### speaker - 'point-to-target' if asked, listener can ask for 'help' from speaker (action 0 - no move)
<img src='images/seaker-point.jpg' width="500" height="500">

##### Listener new-wrapper :
The speaker observation is of type `Box(-inf, inf, (9,), float32)` 
1. listener agent velocity X
2. listener agent velocity Y
3. red landmark X pos - listener agent X pos
4. red landmark Y pos - listener agent Y pos
5. blue landmark X pos - listener agent X pos
6. blue landmark Y pos - listener agent Y pos
7. green landmark X pos - listener agent X pos
8. green landmark Y pos - listener agent Y pos
9. communication channel - if com action was choosen last - speaker tells destination (1/2/3) else - 0
##### Discrete ACTIONS:
* 0 - do nothing and ask for communicate (com penalty)
* 1 - push left (add velocity in negative x-axis direction)
* 2 - push right (add velocity in positive x-axis direction)
* 3 - push down (add velocity in negative y-axis direction)
* 4 - push up (add velocity in positive t-axis direction)
"""

import numpy as np
from gym import Wrapper
from copy import deepcopy
from pettingzoo.mpe import simple_speaker_listener_v3
from stable_baselines3 import PPO

# speaker action-to-index dict
SPEAKER_DISCRETE_ACTIONS = {

    'A': 0,
    'B': 1,
    'C': 2,
    'nothing' : 3,

}

# listener action-to-index dict
LISTENER_DISCRETE_ACTIONS = {
    'nothing': 0,
    'left':    1,
    'right':   2,
    'down':    3,
    'up':      4
}

from gym.spaces import Discrete, Box

"""#### Custom gym Wrapper
"""
class ListenerOnlyCOMWrapper(Wrapper):

    def __init__(self, env, com_allways_on = False, com_reward = -0.1):
        super().__init__(env)
        self.com_allways_on = com_allways_on
        self.com_reward = com_reward

        # reset to skip speaker before new game
        self.obs = self.reset()

        # set single agent list
        self.agents = self.agents[1:]

        self.observation_space = self.get_observation_space()

        #set action space (listener)
        self.action_space = env.action_spaces[self.agents[0]]

    def get_observation_space(self):
        return Box(low=-np.inf,high=np.inf , shape= (len(self.obs),), dtype=np.float32)

    def reset(self):
        super().reset()


        # skip speaker action
        self.__step_speaker()
        self.obs,_,_,_ = self.env.last()
        self.obs = self.fix_obs(self.obs, self.com_allways_on)
        return self.obs

    def step(self, action):
        # CHANGE ACTION AS NEEDED
        com = action == 0
        super().step(action)
        ob, _, done, _ = self.env.last()  # do listener action

        # skip speaker action - if com==True, speaker 'speaks' else - 0 speak will be performed
        self.__step_speaker()

        step_rets = self.env.last()

        if self.com_allways_on:  com = True

        self.obs = self.fix_obs(step_rets[0], com)

        reward_com = 0
        if com:
            # get -1 reward for com use TODO adjust reward if needed
            if not self.com_allways_on: reward_com = self.com_reward

        return (self.obs,step_rets[1] + reward_com, step_rets[2], step_rets[3])

    def fix_obs(self, obs, com):
        if len(obs)<5: return obs
        obs = np.array(obs[:-2])
        point = int(obs[-1])
        if not com or point==3:
            obs[-1] = 0
            return obs
        try:
            x = obs[2+point*2]
            y = obs[3+point*2]
        except:
            print('out of bounds')
        myradians = math.atan2(y, x)
        deg = int(math.degrees(myradians))
        if deg<0:
            deg+=360
        output = deg // 45
        obs[-1] = (output+1)
        return obs

    def __step_speaker(self):
        goal_color, _, done, _ = self.env.last()

        # speaker is done before the listener.
        if done:
            return

        # step with the correct action type
        if self.env.unwrapped.continuous_actions:
            super().step(goal_color)
        else:
            super().step(np.argmax(goal_color))

env = simple_speaker_listener_v3.env(max_cycles=200, continuous_actions=False)
env = ListenerOnlyCOMWrapper(env, com_allways_on=False)
print(f'custom wrapped environment: {env}')

#train PPO agent method
def train_model(env):
    env_copy = deepcopy(env)
    env_copy.reset()
    model = PPO("MlpPolicy", env_copy, verbose=1)
    for batch in range(200):
        print(f"batch: {batch}")
        model.learn(total_timesteps=8000, n_eval_episodes=40)
        env_copy.reset()
    return model

def get_ppo_agent(is_com_active):
    if is_com_active:
        model_file_name = 'S-L_PPO_base'
    else:
        model_file_name = 'S-L_PPO_com10'
    try:
        model = PPO.load(model_file_name)
    except:
        model = train_model(env)
        model.save(model_file_name)
    return model

# run (n_iter) episodes after training and render
def run(env,model,n_iter):
    for m in range(n_iter):
        observation = env.reset()
        env.render()
        for i in range(env.unwrapped.max_cycles):
            # choose an action and execute
            action = model.predict(observation)
            action = action[0]
            print(f'step {i}')
            print(f'observation: {observation}')
            observation, reward, done, info = env.step(action)

            # log everything
            c = "no_com"
            if action == 0 or env.com_allways_on: c = "com active"
            print(f'action:      {action}  com:{c}')
            print(f'reward:      {reward}')
            print()

            # if done, the episode is complete. no more actions can be taken
            if done:
                break

            env.render()
        env.close()

model = get_ppo_agent(env.com_allways_on)

run(env,model,5)

print("that all - part 4")

"""## Method Weaknesses, Issues and Failures

- Adding complexity, to a complexed problem. 5 stages, all must be reasonable for a good communication model
- sending the wrong message or too much information can result in worse performence
- Does not worth the effort when a central 'brain' available
- Implicit COM - domain knowlege (and ussualy obs space of other agents) dependant
- Communication (explicit) is exposed to Noise, Jamming, and Spoofing


"""