#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# this file is part of 'RAX-AutoScaler'
#
# Copyright 2014 Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import common
import pyrax
import argparse
import time
import os
import sys
import logging.config
import random
from colouredconsolehandler import ColouredConsoleHandler
from auth import Auth
import cloudmonitor
import subprocess
from version import return_version
# CHECK logging.conf
logging_config = common.check_file('logging.conf')
if logging_config is None:
logging.handlers.ColouredConsoleHandler = ColouredConsoleHandler
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
else:
logging_conf_file = logging_config
logging.handlers.ColouredConsoleHandler = ColouredConsoleHandler
logging.config.fileConfig(logging_conf_file)
logger = logging.getLogger(__name__)
[docs]def exit_with_error(msg):
"""This function prints error message and exit with error.
:param msg: error message
:type name: str
:returns: 1 (int) -- the return code
"""
if msg is None:
try:
log_file = logger.root.handlers[0].baseFilename
logger.info('completed with an error: %s' % log_file)
except:
print ('(info) rax-autoscale completed with an error')
else:
try:
logger.error(msg)
log_file = logger.root.handlers[0].baseFilename
logger.info('completed with an error: %s' % log_file)
except:
print ('(error) %s' % msg)
print ('(info) rax-autoscale completed with an error')
exit(1)
[docs]def is_node_master(scalingGroup):
"""This function checks scaling group state and determines if node is a master.
:param scalingGroup: data about servers in scaling group retrieve from
cloudmonitor
:returns: 1 : if cluster state is unknown
2 : node is a master
None : node is not a master
"""
masters = []
node_id = common.get_machine_uuid()
if node_id is None:
logger.error('Failed to get server uuid')
return 1
sg_state = scalingGroup.get_state()
if len(sg_state['active']) == 1:
masters.append(sg_state['active'][0])
elif len(sg_state['active']) > 1:
masters.append(sg_state['active'][0])
masters.append(sg_state['active'][1])
else:
logger.error('Unknown cluster state')
return 1
if node_id in masters:
logger.info('Node is a master, continuing')
return 2
else:
logger.info('Node is not a master, nothing to do. Exiting')
return
[docs]def get_scaling_group(group, config_data):
"""This function checks and gets active servers in scaling group
:param group: group name
:param config_data: json configuration data
:returns: scalingGroup if server state is active else null
"""
group_id = common.get_group_value(config_data, group, 'group_id')
if group_id is None:
logger.error('Unable to get group_id from json file')
return
scalingGroup = cloudmonitor.scaling_group_servers(group_id)
if scalingGroup is None:
return
# Check active server(s) in scaling group
if len(scalingGroup.get_state()['active']) == 0:
return
else:
logger.info('Server(s) in scaling group: %s' %
', '.join(['(%s, %s)'
% (cloudmonitor.get_server_name(s_id), s_id)
for s_id in scalingGroup.get_state()['active']]))
logger.info('Current Active Servers: ' +
str(scalingGroup.get_state()['active_capacity']))
return scalingGroup
[docs]def autoscale(group, config_data, args):
"""This function executes scale up or scale down policy
:param group: group name
:param config_data: json configuration data
:param args: user provided arguments
"""
au = pyrax.autoscale
scalingGroup = get_scaling_group(group, config_data)
if scalingGroup is None:
return 1
check_type = common.get_group_value(config_data, group, 'check_type')
if check_type is None:
return 1
check_config = common.get_group_value(config_data, group, 'check_config')
if check_config is None:
return 1
for s_id in scalingGroup.get_state()['active']:
rv = cloudmonitor.add_cm_check(s_id, check_type, check_config)
logger.info('Cluster Mode Enabled: %s' % str(args['cluster']))
if args['cluster']:
rv = is_node_master(scalingGroup)
if rv is None:
# Not a master, no need to proceed further
return
if rv == 1:
# Cluster state unknown return error.
return 1
# Gather cluster statistics
check_type = common.get_group_value(config_data, group, 'check_type')
if check_type is None:
check_type = 'agent.load_average'
metric_name = common.get_group_value(config_data, group, 'metric_name')
if metric_name is None:
metric_name = '1m'
logger.info('Gathering Monitoring Data')
results = []
cm = pyrax.cloud_monitoring
# Get all CloudMonitoring entities on the account
entities = cm.list_entities()
# Shuffle entities so the sample uses different servers
entities = random.sample(entities, len(entities))
for ent in entities:
# Check if the entity is also in the scaling group
if ent.agent_id in scalingGroup.get_state()['active']:
ent_checks = ent.list_checks()
# Loop through checks to find checks of the correct type
for check in ent_checks:
if check.type == check_type:
data = check.get_metric_data_points(metric_name,
int(time.time())-300,
int(time.time()),
points=2)
if len(data) > 0:
point = len(data)-1
logger.info('Found metric for: ' + ent.name +
', value: ' + str(data[point]['average']))
results.append(float(data[point]['average']))
break
# Restrict number of data points to save on API calls
if len(results) >= args['max_sample']:
logger.info('--max-sample value of ' + str(args['max_sample']) +
' reached, not gathering any more statistics')
break
if len(results) == 0:
logger.error('No data available')
return 1
else:
average = sum(results)/len(results)
scale_up_threshold = common.get_group_value(config_data, group,
'scale_up_threshold')
if scale_up_threshold is None:
scale_up_threshold = 0.6
scale_down_threshold = common.get_group_value(config_data, group,
'scale_down_threshold')
if scale_down_threshold is None:
scale_down_threshold = 0.4
logger.info('Cluster average for ' + check_type +
'(' + metric_name + ') at: ' + str(average))
if average > scale_up_threshold:
try:
logger.info('Above Threshold - Scaling Up')
scale_policy_id = common.get_group_value(config_data, group,
'scale_up_policy')
scale_policy = scalingGroup.get_policy(scale_policy_id)
if not args['dry_run']:
common.webhook_call(config_data, group, 'scale_up', 'pre')
scale_policy.execute()
common.webhook_call(config_data, group, 'scale_up', 'post')
else:
logger.info('Scale up prevented by --dry-run')
logger.info('Scale up policy executed ('
+ scale_policy_id + ')')
except Exception, e:
logger.warning('Scale up: %s' % str(e))
elif average < scale_down_threshold:
try:
logger.info('Below Threshold - Scaling Down')
scale_policy_id = common.get_group_value(config_data, group,
'scale_down_policy')
scale_policy = scalingGroup.get_policy(scale_policy_id)
if not args['dry_run']:
common.webhook_call(config_data, group, 'scale_down', 'pre')
scale_policy.execute()
common.webhook_call(config_data, group, 'scale_down', 'post')
else:
logger.info('Scale down prevented by --dry-run')
logger.info('Scale down policy executed (' +
scale_policy_id + ')')
except Exception, e:
logger.warning('Scale down: %s' % str(e))
else:
logger.info('Cluster within target paramters')
[docs]def main():
"""This function validates user arguments and data in configuration file.
It calls auth class for authentication and autoscale to execute scaling
policy
"""
parser = argparse.ArgumentParser()
parser.add_argument('--as-group', required=False,
help='The autoscale group config ID')
parser.add_argument('--os-username', required=False,
help='Rackspace Cloud user name')
parser.add_argument('--os-password', required=False,
help='Rackspace Cloud account API key')
parser.add_argument('--config-file', required=False, default='config.json',
help='The autoscale configuration .ini file'
'(default: config.json)'),
parser.add_argument('--os-region-name', required=False,
help='The region to build the servers',
choices=['SYD', 'HKG', 'DFW', 'ORD', 'IAD', 'LON'])
parser.add_argument('--cluster', required=False,
default=False, action='store_true')
parser.add_argument('--version', action='version',
help='Show version number', version=return_version())
parser.add_argument('--dry-run', required=False, default=False,
action='store_true',
help='Do not actually perform any scaling operations '
'or call webhooks')
parser.add_argument('--max-sample', required=False, default=10, type=int,
help='Maximum number of servers to obtain monitoring '
'samples from')
args = vars(parser.parse_args())
# CONFIG.ini
config_file = common.check_file(args['config_file'])
if config_file is None:
exit_with_error("Either file is missing or is not readable: '%s'"
% args['config_file'])
# Show Version
logger.info(return_version())
for arg in args:
logger.debug('argument provided by user ' + arg + ' : ' +
str(args[arg]))
# Get data from config.json
config_data = common.get_config(config_file)
if config_data is None:
exit_with_error('Failed to read config file: ' + config_file)
# Get group
if not args['as_group']:
if len(config_data['autoscale_groups'].keys()) == 1:
as_group = config_data['autoscale_groups'].keys()[0]
else:
logger.debug("Getting system hostname")
try:
sysout = subprocess.Popen(['hostname'], stdout=subprocess.PIPE)
hostname = (sysout.communicate()[0]).strip()
if '-' in hostname:
hostname = hostname.rsplit('-', 1)[0]
group_value = config_data["autoscale_groups"][hostname]
as_group = hostname
except Exception, e:
logger.debug("Failed to get hostname: %s" % str(e))
logger.warning("Multiple group found in config file, "
"please use 'as-group' option")
exit_with_error('Unable to identify targeted group')
else:
try:
group_value = config_data["autoscale_groups"][args['as_group']]
as_group = args['as_group']
except:
exit_with_error("Unable to find group '" + args['as_group'] +
"' in " + config_file)
username = common.get_user_value(args, config_data, 'os_username')
if username is None:
exit_with_error(None)
api_key = common.get_user_value(args, config_data, 'os_password')
if api_key is None:
exit_with_error(None)
region = common.get_user_value(args, config_data, 'os_region_name')
if region is None:
exit_with_error(None)
session = Auth(username, api_key, region)
if session.authenticate() is True:
rv = autoscale(as_group, config_data, args)
if rv is None:
log_file = None
if hasattr(logger.root.handlers[0], 'baseFilename'):
log_file = logger.root.handlers[0].baseFilename
if log_file is None:
logger.info('completed successfull')
else:
logger.info('completed successfully: %s' % log_file)
else:
exit_with_error(None)
else:
exit_with_error('Authentication failed')
if __name__ == '__main__':
main()