"""
Define the Instance class, which represents the problem we are facing, with its data, information,
parameters. Provide Instance-related methods.
"""
import math
from . import init as it
from . import node as nd
[docs]class Instance:
"""Description of a problem instance.
:param df_indiv: _description_
:type df_indiv: pd.DataFrame
:param df_host: _description_
:type df_host: pd.DataFrame
:param df_host_meta: _description_
:type df_host_meta: pd.DataFrame
:param time: _description_
:type time: int
:param nb_nodes: _description_
:type nb_nodes: int
:param nb_containers: _description_
:type nb_containers: int
:param nb_clusters: _description_
:type nb_clusters: int
:param dict_id_n: _description_
:type dict_id_n: Dict
:param dict_id_c: _description_
:type dict_id_c: Dict
"""
def __init__(self, path, config):
"""Instance constructor
:param path: Filesystem path to the input files
:type path: str
:param config: Configuration dict from config file
:type config: Dict
"""
(self.df_indiv,
self.df_host,
self.df_host_meta) = it.init_dfs(path)
self.time: int = self.df_indiv[it.tick_field].nunique()
self.nb_nodes = self.df_host_meta[it.host_field].nunique()
self.nb_containers = self.df_indiv[it.indiv_field].nunique()
self.nb_clusters = config['clustering']['nb_clusters']
self.df_indiv = self.df_indiv.astype({
it.indiv_field: str,
it.host_field: str,
it.tick_field: int})
self.df_host = self.df_host.astype({
it.host_field: str,
it.tick_field: int})
self.df_host_meta = self.df_host_meta.astype({it.host_field: str})
self.df_host.sort_values(it.tick_field, inplace=True)
self.df_indiv.sort_values(it.tick_field, inplace=True)
self.df_host.set_index(
[it.tick_field, it.host_field], inplace=True, drop=False)
self.df_indiv.set_index(
[it.tick_field, it.indiv_field], inplace=True, drop=False)
self.percentage_to_timestamp(config)
self.dict_id_n = nd.build_dict_id_nodes(self.df_host_meta)
self.dict_id_c = {}
self.print()
# TODO rewrite with __str__
[docs] def print(self):
"""Print Instance information."""
it.results_file.writelines(['### Problem instance informations ###\n',
'Time considered : %d\n' % self.time,
'%d nodes -- ' % self.nb_nodes,
'%d containers\n' % self.nb_containers,
'\n### Parameters ###\n'
'clusters : %d\n' % self.nb_clusters,
'tau : %d (%f%%)\n' % (
self.window_duration,
(self.window_duration / self.time)),
'\n'])
[docs] def print_times(self, tick):
"""Print time informations.
:param tick: _description_
:type tick: int
"""
print('Total time : ', self.time)
print('Window duration : ', self.window_duration)
print('Separation time : ', self.sep_time)
print('Ticks : ', tick)
[docs] def percentage_to_timestamp(self, config):
"""Transform percentage config time to timestamp.
:param config: _description_
:type config: Dict
"""
# TODO consider 'tick' param as absolute, not percent ?
self.window_duration = math.floor(
self.time * int(config['analysis']['window_duration']) / 100
)
sep_nb_data = math.floor(
self.time * int(config['analysis']['sep_time']) / 100
)
self.sep_time = self.df_indiv[it.tick_field].min() + sep_nb_data - 1
if config['loop']['tick'] == 'default':
config['loop']['tick'] = self.window_duration - 1
else:
config['loop']['tick'] = math.floor(
self.time * int(config['loop']['tick']) / 100
) - 1
if self.window_duration <= 1:
self.window_duration += 1
if self.sep_time <= 0:
self.sep_time = 1
if config['loop']['tick'] <= 0:
config['loop']['tick'] = 1
# if self.window_duration == config['loop']['tick']:
# self.window_duration += 1
[docs] def get_node_from_container(self, container_id):
"""Get node ID from container ID.
:param container_id: _description_
:type container_id: str
:return: _description_
:rtype: str
"""
return (self.df_indiv.loc[
self.df_indiv[it.indiv_field] == container_id
][it.host_field].to_numpy()[0])