[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-ha-dev
Subject: Re: [Linux-ha-dev] Patch for mysql RA
From: Marek Marczykowski <marmarek () staszic ! waw ! pl>
Date: 2010-06-29 16:53:23
Message-ID: 20100629165323.GT27752 () boss ! staszic ! waw ! pl
[Download RAW message or body]
[Attachment #2 (multipart/signed)]
[Attachment #4 (multipart/mixed)]
On Tue, Jun 29, 2010 at 05:41:08PM +0200, Dejan Muhamedagic wrote:
> Hi,
Hi,
> On Tue, Jun 29, 2010 at 12:45:47AM +0200, Marek Marczykowski wrote:
> > I'm implementing some HA solution using pacemaker and I've made some
> > changes to mysql RA. Maybe you get interested in some of them. Patch
> > attached. List of changes:
> > * [bugfix] monitor return $OCF_RUNNING_MASTER on master
> > * [bugfix] slave info collected with replication user
> > * [bugfix] cut ending space from OCF_* host lists
> > * [doc] suggest --skip-slave-start option
> > * [feature] detailed logging on errors
> > * [feature] setup replication on late slave start
> > * [feature] another concept of M/S replication - try to keep state
>
> Could we please have this split into as many patches as there are
> unrelated changes (looks like there should be 7). Otherwise it's
> going to be difficult to see what's affected by which part of the
> patch.
Ok, I've splitted it into 8 patches (some typo fix missing in the list
above). Patches attached and also uploaded here:
http://marmarek.w.staszic.waw.pl/patches/ha-mysql-ra
--
Best Regards,
Marek Marczykowski | gg:2873965 | RLU #390519
marmarek at staszic waw pl | xmpp:marmarek at staszic waw pl
["01_mysql-ra-wrong-assigment.patch" (text/plain)]
--- mysql.orig 2010-06-29 17:45:14.390077677 +0200
+++ mysql 2010-06-29 18:07:44.399141260 +0200
@@ -458,7 +458,7 @@
master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
if [ $master_pref -lt 0 ]; then
# Sanitize a below-zero preference to just zero
- $master_pref=0
+ master_pref=0
fi
$CRM_MASTER -v $master_pref
fi
["02_mysql-ra-trailing-spaces.patch" (text/plain)]
--- mysql-01 2010-06-29 18:13:35.738106899 +0200
+++ mysql 2010-06-29 18:16:52.899101522 +0200
@@ -811,7 +811,7 @@
# connect to it and wait for it to start replicating.
local master_host
local master_status
- master_host=$OCF_RESKEY_CRM_meta_notify_promote_uname
+ master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
if ( ! mysql_status ); then
return $OCF_NOT_RUNNING
@@ -834,7 +834,8 @@
# The master has completed its promotion. Now is a good
# time to check whether our replication slave is working
# correctly.
- if [ $OCF_RESKEY_CRM_meta_notify_promote_uname = `uname -n` ]; then
+ master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
+ if [ "$master_host" = `uname -n` ]; then
ocf_log info "Ignoring post-promote notification for my own promotion."
return $OCF_SUCCESS
fi
@@ -842,10 +843,12 @@
-e 'START SLAVE';
;;
'post-demote')
- if [ $OCF_RESKEY_CRM_meta_notify_demote_uname = `uname -n` ]; then
+ demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
+ if [ $demote_host = `uname -n` ]; then
ocf_log info "Ignoring post-demote notification for my own demotion."
return $OCF_SUCCESS
fi
+ ocf_log info "post-demote notification for $demote_host."
# The former master has just been gracefully demoted.
unset_master
;;
["03_mysql-ra-monitor.patch" (text/plain)]
--- mysql-02 2010-06-29 18:17:59.214106092 +0200
+++ mysql 2010-06-29 18:18:26.238105571 +0200
@@ -618,8 +618,13 @@
fi
fi
- ocf_log info "MySQL monitor succeeded";
- return $OCF_SUCCESS
+ if [ "$OCF_RESKEY_CRM_meta_role" = "Master" ]; then
+ ocf_log info "MySQL monitor succeeded (master)";
+ return $OCF_RUNNING_MASTER
+ else
+ ocf_log info "MySQL monitor succeeded";
+ return $OCF_SUCCESS
+ fi
}
mysql_start() {
["04_mysql-ra-detailed-logging.patch" (text/plain)]
--- mysql-03 2010-06-29 18:19:06.698076674 +0200
+++ mysql 2010-06-29 18:21:29.398105235 +0200
@@ -511,7 +511,12 @@
# First, stop the slave I/O thread and wait for relay log
# processing to complete
ocf_run $MYSQL $mysql_options \
- -e "STOP SLAVE IO_THREAD" || exit $OCF_ERR_GENERIC
+ -e "STOP SLAVE IO_THREAD"
+ if [ $? -gt 0 ]; then
+ ocf_log err "Error stopping slave IO thread"
+ exit $OCF_ERR_GENERIC
+ fi
+
while true; do
$MYSQL $mysql_options \
-e 'SHOW PROCESSLIST\G' > $tmpfile
@@ -526,9 +531,18 @@
# Now, stop all slave activity and unset the master host
ocf_run $MYSQL $mysql_options \
- -e "STOP SLAVE" || exit $OCF_ERR_GENERIC
+ -e "STOP SLAVE"
+ if [ $? -gt 0 ]; then
+ ocf_log err "Error stopping rest slave threads"
+ exit $OCF_ERR_GENERIC
+ fi
+
ocf_run $MYSQL $mysql_options \
- -e "CHANGE MASTER TO MASTER_HOST=''" || exit $OCF_ERR_GENERIC
+ -e "CHANGE MASTER TO MASTER_HOST=''"
+ if [ $? -gt 0 ]; then
+ ocf_log err "Failed to set master"
+ exit $OCF_ERR_GENERIC
+ fi
}
#######################################################################
@@ -788,7 +802,15 @@
}
mysql_demote() {
- set_read_only on || return $OCF_ERR_GENERIC
+ if ( ! mysql_status ); then
+ return $OCF_NOT_RUNNING
+ fi
+
+ set_read_only on
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to set read-only";
+ return $OCF_ERR_GENERIC;
+ fi
# Return master preference to default, so the cluster manager gets
# a chance to select a new master
["05_mysql-ra-doc-skip-slave.patch" (text/plain)]
--- mysql-04 2010-06-29 18:23:11.398106631 +0200
+++ mysql 2010-06-29 18:23:18.942105663 +0200
@@ -248,6 +248,7 @@
<longdesc lang="en">
Additional parameters which are passed to the mysqld on startup.
(e.g. --skip-external-locking or --skip-grant-tables)
+On M/S setup --skip-slave-start is needed (or in config file).
</longdesc>
<shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
<content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>
["06_mysql-ra-slave-start-replication.patch" (text/plain)]
--- mysql-05 2010-06-29 18:23:43.806105971 +0200
+++ mysql 2010-06-29 18:23:55.882104463 +0200
@@ -730,6 +730,19 @@
# don't know what master to replicate from), we simply start
# in read only mode.
set_read_only on
+
+ master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
+ if [ "$master_host" -a "$master_host" != `uname -n` ]; then
+ ocf_log info "Changing MySQL configuration to replicate from $master_host."
+ set_master $master_host
+ ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+ -e "SLAVE START"
+ if [ $? -ne 0 ]; then
+ ocf_log err "Failed to start slave";
+ return $OCF_ERR_GENERIC;
+ fi
+ fi
+
# We also need to set a master preference, otherwise Pacemaker
# won't ever promote us in the absence of any explicit
# preference set by the administrator. We choose a low
["07_mysql-ra-check-slave-user.patch" (text/plain)]
--- mysql-06 2010-06-29 18:24:48.166105436 +0200
+++ mysql 2010-06-29 18:32:18.666101597 +0200
@@ -371,7 +371,7 @@
tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
- mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user \
--password=$OCF_RESKEY_test_passwd" + mysql_options="$MYSQL_OPTIONS_LOCAL \
--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
$MYSQL $mysql_options \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
@@ -396,7 +396,7 @@
rc=1
tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
- mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user \
--password=$OCF_RESKEY_test_passwd" + mysql_options="$MYSQL_OPTIONS_LOCAL \
--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"
$MYSQL $mysql_options \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
["08_mysql-ra-repl-keep-state.patch" (text/plain)]
--- mysql 2010-06-29 18:32:18.666101597 +0200
+++ mysql-07 2010-06-29 18:30:40.254105999 +0200
@@ -76,6 +76,7 @@
OCF_RESKEY_replication_port_default="3306"
OCF_RESKEY_max_slave_lag_default="3600"
OCF_RESKEY_evict_outdated_slaves_default="false"
+OCF_RESKEY_state_default=${HA_RSCTMP}/Mysql-repl-${OCF_RESOURCE_INSTANCE}.state
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
@@ -106,6 +107,8 @@
: ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
: ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
+: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}}
+
#######################################################################
usage() {
@@ -308,6 +311,14 @@
<content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
</parameter>
+<parameter name="state" unique="1">
+<longdesc lang="en">
+Location to store the mysql replication state in.
+</longdesc>
+<shortdesc lang="en">State file</shortdesc>
+<content type="string" default="${OCF_RESKEY_state_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -387,13 +398,11 @@
return 1
}
-check_slave() {
- # Checks slave status
- local rc
- local tmpfile
+get_slave_info() {
+ # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
+
local mysql_options
- rc=1
tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user \
--password=$OCF_RESKEY_replication_passwd" @@ -401,23 +410,36 @@
$MYSQL $mysql_options \
-e 'SHOW SLAVE STATUS\G' > $tmpfile
- local master_host
- local master_user
- local master_port
- local slave_sql
- local slave_io
- local last_errno
- local secs_behind
-
if [ -s $tmpfile ]; then
- master_host=`sed -ne 's/^.*Master_Host: \(.*\)$/\1/p' < $tmpfile`
- master_user=`sed -ne 's/^.*Master_User: \(.*\)$/\1/p' < $tmpfile`
- master_port=`sed -ne 's/^.*Master_Port: \(.*\)$/\1/p' < $tmpfile`
- slave_sql=`sed -ne 's/^.*Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
- slave_io=`sed -ne 's/^.*Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
- last_errno=`sed -ne 's/^.*Last_Errno: \(.*\)$/\1/p' < $tmpfile`
- secs_behind=`sed -ne 's/^.*Seconds_Behind_Master: \(.*\)$/\1/p' < $tmpfile`
+ master_host=`sed -ne 's/^.* Master_Host: \(.*\)$/\1/p' < $tmpfile`
+ master_user=`sed -ne 's/^.* Master_User: \(.*\)$/\1/p' < $tmpfile`
+ master_port=`sed -ne 's/^.* Master_Port: \(.*\)$/\1/p' < $tmpfile`
+ master_log_file=`sed -ne 's/^.* Master_Log_File: \(.*\)$/\1/p' < $tmpfile`
+ master_log_pos=`sed -ne 's/^.* Read_Master_Log_Pos: \(.*\)$/\1/p' < $tmpfile`
+ slave_sql=`sed -ne 's/^.* Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
+ slave_io=`sed -ne 's/^.* Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
+ last_errno=`sed -ne 's/^.* Last_Errno: \(.*\)$/\1/p' < $tmpfile`
+ secs_behind=`sed -ne 's/^.* Seconds_Behind_Master: \(.*\)$/\1/p' < $tmpfile`
+
+ ocf_log debug "MySQL instance running as a replication slave"
+ else
+ # Instance produced an empty "SHOW SLAVE STATUS" output --
+ # instance is not a slave
+ ocf_log err "check_slave invoked on an instance that is not a replication slave."
+ return $OCF_ERR_GENERIC
+ fi
+ return $OCF_SUCCESS
+}
+
+check_slave() {
+ # Checks slave status
+ local rc
+
+ get_slave_info
+ rc=$?
+
+ if [ $rc -eq 0 ]; then
if [ $last_errno -ne 0 ]; then
# Whoa. Replication ran into an error. This slave has
# diverged from its master. Make sure this resource
@@ -476,18 +498,42 @@
}
set_master() {
+ local new_master_host
+ local master_params
+
+ new_master_host=$1
+
+ # Keep replication position
+ get_slave_info
+
+ if [ "$master_log_file" -a "$new_master_host" = "$master_host" ]; then
+ master_params=", MASTER_LOG_FILE='$master_log_file', \
+ MASTER_LOG_POS=$master_log_pos"
+ ocf_log debug "Kept master pos for $master_host : \
$master_log_file:$master_log_pos" + elif [ -r "$OCF_RESKEY_state" ]; then
+ master_host=
+ . $OCF_RESKEY_state
+ if [ "$new_master_host" = "$master_host" ]; then
+ master_params=", MASTER_LOG_FILE='$master_log_file', \
+ MASTER_LOG_POS=$master_log_pos"
+ ocf_log debug "Restored master pos for $master_host : \
$master_log_file:$master_log_pos" + fi
+ fi
+
# Informs the MySQL server of the master to replicate
# from. Accepts one mandatory argument which must contain the host
# name of the new master host. The master must either be unchanged
# from the laste master the slave replicated from, or freshly
# reset with RESET MASTER.
- local master_host
- master_host=$1
ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
- -e "CHANGE MASTER TO MASTER_HOST='$master_host', \
+ -e "CHANGE MASTER TO MASTER_HOST='$new_master_host', \
MASTER_USER='$OCF_RESKEY_replication_user', \
- MASTER_PASSWORD='$OCF_RESKEY_replication_passwd'"
+ MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' \
$master_params" +
+ # Remove state file - it will be invalid after SLAVE START
+ rm -f $OCF_RESKEY_state
+ rm -f $tmpfile
}
unset_master(){
@@ -537,7 +583,16 @@
ocf_log err "Error stopping rest slave threads"
exit $OCF_ERR_GENERIC
fi
-
+
+ #Save current state
+ get_slave_info
+ cat <<END > $OCF_RESKEY_state
+master_host="$master_host"
+master_log_file="$master_log_file"
+master_log_pos="$master_log_pos"
+END
+ rm -f $tmpfile
+
ocf_run $MYSQL $mysql_options \
-e "CHANGE MASTER TO MASTER_HOST=''"
if [ $? -gt 0 ]; then
@@ -805,6 +860,8 @@
if ( ! mysql_status ); then
return $OCF_NOT_RUNNING
fi
+ ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+ -e "SLAVE STOP"
set_read_only off || return $OCF_ERR_GENERIC
# Existing master gets a higher-than-default master preference, so
@@ -863,9 +920,7 @@
fi
if [ $master_host = `uname -n` ]; then
- ocf_log info "Resetting MySQL replication configuration on new master \
$master_host"
- ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
- -e 'RESET MASTER'
+ ocf_log info "This will be new master"
else
ocf_log info "Changing MySQL configuration to replicate from $master_host"
set_master $master_host
["smime.p7s" (application/x-pkcs7-signature)]
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic