[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-ha-dev
Subject:    Re: [Linux-ha-dev] Patch for mysql RA
From:       Marek Marczykowski <marmarek () staszic ! waw ! pl>
Date:       2010-06-29 16:53:23
Message-ID: 20100629165323.GT27752 () boss ! staszic ! waw ! pl
[Download RAW message or body]

[Attachment #2 (multipart/signed)]

[Attachment #4 (multipart/mixed)]


On Tue, Jun 29, 2010 at 05:41:08PM +0200, Dejan Muhamedagic wrote:
> Hi,

Hi,

> On Tue, Jun 29, 2010 at 12:45:47AM +0200, Marek Marczykowski wrote:
> > I'm implementing some HA solution using pacemaker and I've made some
> > changes to mysql RA. Maybe you get interested in some of them. Patch
> > attached. List of changes:
> >  * [bugfix] monitor return $OCF_RUNNING_MASTER on master
> >  * [bugfix] slave info collected with replication user
> >  * [bugfix] cut ending space from OCF_* host lists
> >  * [doc] suggest --skip-slave-start option
> >  * [feature] detailed logging on errors
> >  * [feature] setup replication on late slave start
> >  * [feature] another concept of M/S replication - try to keep state
> 
> Could we please have this split into as many patches as there are
> unrelated changes (looks like there should be 7). Otherwise it's
> going to be difficult to see what's affected by which part of the
> patch.

Ok, I've splitted it into 8 patches (some typo fix missing in the list
above). Patches attached and also uploaded here:
http://marmarek.w.staszic.waw.pl/patches/ha-mysql-ra

-- 
Best Regards,
Marek Marczykowski          |   gg:2873965      | RLU #390519
marmarek at staszic waw pl  | xmpp:marmarek at staszic waw pl


["01_mysql-ra-wrong-assigment.patch" (text/plain)]

--- mysql.orig	2010-06-29 17:45:14.390077677 +0200
+++ mysql	2010-06-29 18:07:44.399141260 +0200
@@ -458,7 +458,7 @@
 	    master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
 	    if [ $master_pref -lt 0 ]; then
 		# Sanitize a below-zero preference to just zero
-		$master_pref=0
+		master_pref=0
 	    fi
 	    $CRM_MASTER -v $master_pref
 	fi

["02_mysql-ra-trailing-spaces.patch" (text/plain)]

--- mysql-01	2010-06-29 18:13:35.738106899 +0200
+++ mysql	2010-06-29 18:16:52.899101522 +0200
@@ -811,7 +811,7 @@
 	    # connect to it and wait for it to start replicating.
 	    local master_host
 	    local master_status
-	    master_host=$OCF_RESKEY_CRM_meta_notify_promote_uname
+	    master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
 
 	    if ( ! mysql_status ); then
 		return $OCF_NOT_RUNNING
@@ -834,7 +834,8 @@
 	    # The master has completed its promotion. Now is a good
 	    # time to check whether our replication slave is working
 	    # correctly.
-	    if [ $OCF_RESKEY_CRM_meta_notify_promote_uname = `uname -n` ]; then
+	    master_host=`echo $OCF_RESKEY_CRM_meta_notify_promote_uname|tr -d " "`
+	    if [ "$master_host" = `uname -n` ]; then
 		ocf_log info "Ignoring post-promote notification for my own promotion."
 		return $OCF_SUCCESS
 	    fi
@@ -842,10 +843,12 @@
 		-e 'START SLAVE';
 	    ;;
 	'post-demote')
-	    if [ $OCF_RESKEY_CRM_meta_notify_demote_uname = `uname -n` ]; then
+	    demote_host=`echo $OCF_RESKEY_CRM_meta_notify_demote_uname|tr -d " "`
+	    if [ $demote_host = `uname -n` ]; then
 		ocf_log info "Ignoring post-demote notification for my own demotion."
 		return $OCF_SUCCESS
 	    fi
+	    ocf_log info "post-demote notification for $demote_host."
 	    # The former master has just been gracefully demoted.
 	    unset_master
 	    ;;

["03_mysql-ra-monitor.patch" (text/plain)]

--- mysql-02	2010-06-29 18:17:59.214106092 +0200
+++ mysql	2010-06-29 18:18:26.238105571 +0200
@@ -618,8 +618,13 @@
 	fi
     fi
 
-    ocf_log info "MySQL monitor succeeded";
-    return $OCF_SUCCESS
+    if [ "$OCF_RESKEY_CRM_meta_role" = "Master" ]; then
+	    ocf_log info "MySQL monitor succeeded (master)";
+	    return $OCF_RUNNING_MASTER
+    else
+	    ocf_log info "MySQL monitor succeeded";
+	    return $OCF_SUCCESS
+    fi
 }
 
 mysql_start() {

["04_mysql-ra-detailed-logging.patch" (text/plain)]

--- mysql-03	2010-06-29 18:19:06.698076674 +0200
+++ mysql	2010-06-29 18:21:29.398105235 +0200
@@ -511,7 +511,12 @@
     # First, stop the slave I/O thread and wait for relay log
     # processing to complete
     ocf_run $MYSQL $mysql_options \
-	-e "STOP SLAVE IO_THREAD" || exit $OCF_ERR_GENERIC
+	-e "STOP SLAVE IO_THREAD"
+    if [ $? -gt 0 ]; then
+	ocf_log err "Error stopping slave IO thread"
+	exit $OCF_ERR_GENERIC
+    fi
+
     while true; do
      	$MYSQL $mysql_options \
      	    -e 'SHOW PROCESSLIST\G' > $tmpfile
@@ -526,9 +531,18 @@
 
     # Now, stop all slave activity and unset the master host
     ocf_run $MYSQL $mysql_options \
-	-e "STOP SLAVE" || exit $OCF_ERR_GENERIC
+	-e "STOP SLAVE"
+    if [ $? -gt 0 ]; then
+	ocf_log err "Error stopping rest slave threads"
+	exit $OCF_ERR_GENERIC
+    fi
+    
     ocf_run $MYSQL $mysql_options \
-	-e "CHANGE MASTER TO MASTER_HOST=''" || exit $OCF_ERR_GENERIC
+	-e "CHANGE MASTER TO MASTER_HOST=''" 
+    if [ $? -gt 0 ]; then
+	    ocf_log err "Failed to set master"
+	    exit $OCF_ERR_GENERIC
+    fi
 }
 
 #######################################################################
@@ -788,7 +802,15 @@
 }
 
 mysql_demote() {
-    set_read_only on || return $OCF_ERR_GENERIC
+    if ( ! mysql_status ); then
+	return $OCF_NOT_RUNNING
+    fi
+
+    set_read_only on
+    if [ $? -ne 0 ]; then
+	ocf_log err "Failed to set read-only";
+	return $OCF_ERR_GENERIC;
+    fi
 
     # Return master preference to default, so the cluster manager gets
     # a chance to select a new master

["05_mysql-ra-doc-skip-slave.patch" (text/plain)]

--- mysql-04	2010-06-29 18:23:11.398106631 +0200
+++ mysql	2010-06-29 18:23:18.942105663 +0200
@@ -248,6 +248,7 @@
 <longdesc lang="en">
 Additional parameters which are passed to the mysqld on startup.
 (e.g. --skip-external-locking or --skip-grant-tables)
+On M/S setup --skip-slave-start is needed (or in config file).
 </longdesc>
 <shortdesc lang="en">Additional parameters to pass to mysqld</shortdesc>
 <content type="string" default="${OCF_RESKEY_additional_parameters_default}"/>

["06_mysql-ra-slave-start-replication.patch" (text/plain)]

--- mysql-05	2010-06-29 18:23:43.806105971 +0200
+++ mysql	2010-06-29 18:23:55.882104463 +0200
@@ -730,6 +730,19 @@
 	# don't know what master to replicate from), we simply start
 	# in read only mode.
 	set_read_only on
+
+	master_host=`echo $OCF_RESKEY_CRM_meta_notify_master_uname|tr -d " "`
+	if [ "$master_host" -a "$master_host" != `uname -n` ]; then
+	    ocf_log info "Changing MySQL configuration to replicate from $master_host."
+	    set_master $master_host
+	    ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+		-e "SLAVE START"
+	    if [ $? -ne 0 ]; then
+		ocf_log err "Failed to start slave";
+		return $OCF_ERR_GENERIC;
+	    fi
+	fi
+
 	# We also need to set a master preference, otherwise Pacemaker
 	# won't ever promote us in the absence of any explicit
 	# preference set by the administrator. We choose a low

["07_mysql-ra-check-slave-user.patch" (text/plain)]

--- mysql-06	2010-06-29 18:24:48.166105436 +0200
+++ mysql	2010-06-29 18:32:18.666101597 +0200
@@ -371,7 +371,7 @@
 
     tmpfile=`mktemp ${HA_RSCTMP}/is_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
-    mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user \
--password=$OCF_RESKEY_test_passwd" +    mysql_options="$MYSQL_OPTIONS_LOCAL \
--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"  
     $MYSQL $mysql_options \
         -e 'SHOW SLAVE STATUS\G' > $tmpfile
@@ -396,7 +396,7 @@
     rc=1
     tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
-    mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user \
--password=$OCF_RESKEY_test_passwd" +    mysql_options="$MYSQL_OPTIONS_LOCAL \
--user=$OCF_RESKEY_replication_user --password=$OCF_RESKEY_replication_passwd"  
     $MYSQL $mysql_options \
         -e 'SHOW SLAVE STATUS\G' > $tmpfile


["08_mysql-ra-repl-keep-state.patch" (text/plain)]

--- mysql	2010-06-29 18:32:18.666101597 +0200
+++ mysql-07	2010-06-29 18:30:40.254105999 +0200
@@ -76,6 +76,7 @@
 OCF_RESKEY_replication_port_default="3306"
 OCF_RESKEY_max_slave_lag_default="3600"
 OCF_RESKEY_evict_outdated_slaves_default="false"
+OCF_RESKEY_state_default=${HA_RSCTMP}/Mysql-repl-${OCF_RESOURCE_INSTANCE}.state
 
 : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
 MYSQL_BINDIR=`dirname ${OCF_RESKEY_binary}`
@@ -106,6 +107,8 @@
 : ${OCF_RESKEY_max_slave_lag=${OCF_RESKEY_max_slave_lag_default}}
 : ${OCF_RESKEY_evict_outdated_slaves=${OCF_RESKEY_evict_outdated_slaves_default}}
 
+: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}}
+
 #######################################################################
 
 usage() {
@@ -308,6 +311,14 @@
 <content type="boolean" default="${OCF_RESKEY_evict_outdated_slaves_default}" />
 </parameter>
 
+<parameter name="state" unique="1">
+<longdesc lang="en">
+Location to store the mysql replication state in.
+</longdesc>
+<shortdesc lang="en">State file</shortdesc>
+<content type="string" default="${OCF_RESKEY_state_default}" />
+</parameter>
+
 </parameters>
 
 <actions>
@@ -387,13 +398,11 @@
     return 1
 }
 
-check_slave() {
-    # Checks slave status
-    local rc
-    local tmpfile
+get_slave_info() {
+    # Warning: this sets $tmpfile and LEAVE this file! You must delete it after use!
+
     local mysql_options
 
-    rc=1
     tmpfile=`mktemp ${HA_RSCTMP}/check_slave.${OCF_RESOURCE_INSTANCE}.XXXXXX`
 
     mysql_options="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_replication_user \
--password=$OCF_RESKEY_replication_passwd" @@ -401,23 +410,36 @@
     $MYSQL $mysql_options \
         -e 'SHOW SLAVE STATUS\G' > $tmpfile
 
-    local master_host
-    local master_user
-    local master_port
-    local slave_sql
-    local slave_io
-    local last_errno
-    local secs_behind
-
     if [ -s $tmpfile ]; then
-	master_host=`sed -ne 's/^.*Master_Host: \(.*\)$/\1/p' < $tmpfile`
-	master_user=`sed -ne 's/^.*Master_User: \(.*\)$/\1/p' < $tmpfile`
-	master_port=`sed -ne 's/^.*Master_Port: \(.*\)$/\1/p' < $tmpfile`
-	slave_sql=`sed -ne 's/^.*Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
-	slave_io=`sed -ne 's/^.*Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
-	last_errno=`sed -ne 's/^.*Last_Errno: \(.*\)$/\1/p' < $tmpfile`
-	secs_behind=`sed -ne 's/^.*Seconds_Behind_Master: \(.*\)$/\1/p' < $tmpfile`
+	master_host=`sed -ne 's/^.* Master_Host: \(.*\)$/\1/p' < $tmpfile`
+	master_user=`sed -ne 's/^.* Master_User: \(.*\)$/\1/p' < $tmpfile`
+	master_port=`sed -ne 's/^.* Master_Port: \(.*\)$/\1/p' < $tmpfile`
+	master_log_file=`sed -ne 's/^.* Master_Log_File: \(.*\)$/\1/p' < $tmpfile`
+	master_log_pos=`sed -ne 's/^.* Read_Master_Log_Pos: \(.*\)$/\1/p' < $tmpfile`
+	slave_sql=`sed -ne 's/^.* Slave_SQL_Running: \(.*\)$/\1/p' < $tmpfile`
+	slave_io=`sed -ne 's/^.* Slave_IO_Running: \(.*\)$/\1/p' < $tmpfile`
+	last_errno=`sed -ne 's/^.* Last_Errno: \(.*\)$/\1/p' < $tmpfile`
+	secs_behind=`sed -ne 's/^.* Seconds_Behind_Master: \(.*\)$/\1/p' < $tmpfile`
+
+        ocf_log debug "MySQL instance running as a replication slave"
+    else
+        # Instance produced an empty "SHOW SLAVE STATUS" output --
+        # instance is not a slave
+	ocf_log err "check_slave invoked on an instance that is not a replication slave."
+	return $OCF_ERR_GENERIC
+    fi
 
+    return $OCF_SUCCESS
+}
+
+check_slave() {
+    # Checks slave status
+    local rc
+
+    get_slave_info
+    rc=$?
+
+    if [ $rc -eq 0 ]; then
 	if [ $last_errno -ne 0 ]; then
 	    # Whoa. Replication ran into an error. This slave has
 	    # diverged from its master. Make sure this resource
@@ -476,18 +498,42 @@
 }
 
 set_master() {
+    local new_master_host
+    local master_params
+
+    new_master_host=$1
+
+    # Keep replication position
+    get_slave_info
+
+    if [ "$master_log_file" -a "$new_master_host" = "$master_host" ]; then
+        master_params=", MASTER_LOG_FILE='$master_log_file', \
+                         MASTER_LOG_POS=$master_log_pos"
+        ocf_log debug "Kept master pos for $master_host : \
$master_log_file:$master_log_pos" +    elif [ -r "$OCF_RESKEY_state" ]; then
+        master_host=
+        . $OCF_RESKEY_state
+        if [ "$new_master_host" = "$master_host" ]; then
+                master_params=", MASTER_LOG_FILE='$master_log_file', \
+                                 MASTER_LOG_POS=$master_log_pos"
+                 ocf_log debug "Restored master pos for $master_host : \
$master_log_file:$master_log_pos" +        fi
+     fi
+
     # Informs the MySQL server of the master to replicate
     # from. Accepts one mandatory argument which must contain the host
     # name of the new master host. The master must either be unchanged
     # from the laste master the slave replicated from, or freshly
     # reset with RESET MASTER.
-    local master_host
-    master_host=$1
 
     ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
-	-e "CHANGE MASTER TO MASTER_HOST='$master_host', \
+	-e "CHANGE MASTER TO MASTER_HOST='$new_master_host', \
                              MASTER_USER='$OCF_RESKEY_replication_user', \
-                             MASTER_PASSWORD='$OCF_RESKEY_replication_passwd'"
+                             MASTER_PASSWORD='$OCF_RESKEY_replication_passwd' \
$master_params" +
+    # Remove state file - it will be invalid after SLAVE START
+    rm -f $OCF_RESKEY_state
+    rm -f $tmpfile
 }
 
 unset_master(){
@@ -537,7 +583,16 @@
 	ocf_log err "Error stopping rest slave threads"
 	exit $OCF_ERR_GENERIC
     fi
-    
+
+	#Save current state
+	get_slave_info
+	cat <<END > $OCF_RESKEY_state
+master_host="$master_host"
+master_log_file="$master_log_file"
+master_log_pos="$master_log_pos"
+END
+	rm -f $tmpfile
+
     ocf_run $MYSQL $mysql_options \
 	-e "CHANGE MASTER TO MASTER_HOST=''" 
     if [ $? -gt 0 ]; then
@@ -805,6 +860,8 @@
     if ( ! mysql_status ); then
 	return $OCF_NOT_RUNNING
     fi
+    ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
+	-e "SLAVE STOP"
     set_read_only off || return $OCF_ERR_GENERIC
 
     # Existing master gets a higher-than-default master preference, so
@@ -863,9 +920,7 @@
 	    fi
 
 	    if [ $master_host = `uname -n` ]; then
-		ocf_log info "Resetting MySQL replication configuration on new master \
                $master_host"
-		ocf_run $MYSQL $MYSQL_OPTIONS_LOCAL $MYSQL_OPTIONS_REPL \
-		    -e 'RESET MASTER'
+		ocf_log info "This will be new master"
 	    else
 		ocf_log info "Changing MySQL configuration to replicate from $master_host"
 		set_master $master_host


["smime.p7s" (application/x-pkcs7-signature)]

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic