[prev in list] [next in list] [prev in thread] [next in thread] 

List:       quagga-dev
Subject:    [quagga-dev 10976] [PATCH] quagga: nexthop-tracking.patch
From:       Pradosh Mohapatra <pmohapat () cumulusnetworks ! com>
Date:       2014-01-12 19:05:17
Message-ID: 20140112190517.23838.70264.stgit () monster-04 ! cumulusnetworks ! com
[Download RAW message or body]

Add next hop tracking support to Quagga. Complete documentation in doc/next-hop-tracking.txt.

Signed-off-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com>
Signed-off-by: Daniel Walton <dwalton@cumulusnetworks.com>
---
 bgpd/Makefile.am          |    5 
 bgpd/bgp_debug.c          |   57 ++++
 bgpd/bgp_debug.h          |    3 
 bgpd/bgp_nexthop.c        |  221 +++++++++++++---
 bgpd/bgp_nexthop.h        |   20 +
 bgpd/bgp_nht.c            |  467 +++++++++++++++++++++++++++++++++++
 bgpd/bgp_nht.h            |   62 +++++
 bgpd/bgp_route.c          |  103 +++-----
 bgpd/bgp_route.h          |   14 +
 bgpd/bgp_zebra.c          |   11 +
 doc/next-hop-tracking.txt |  326 ++++++++++++++++++++++++
 lib/Makefile.am           |    4 
 lib/log.c                 |    3 
 lib/memtypes.c            |    1 
 lib/nexthop.c             |  109 ++++++++
 lib/nexthop.h             |   90 +++++++
 lib/zclient.c             |    6 
 lib/zclient.h             |    1 
 lib/zebra.h               |    6 
 zebra/Makefile.am         |    7 -
 zebra/debug.c             |   30 ++
 zebra/debug.h             |    3 
 zebra/rib.h               |   56 ----
 zebra/rt_netlink.c        |    1 
 zebra/zebra_fpm_netlink.c |    1 
 zebra/zebra_rib.c         |   45 +--
 zebra/zebra_rnh.c         |  606 +++++++++++++++++++++++++++++++++++++++++++++
 zebra/zebra_rnh.h         |   48 ++++
 zebra/zebra_rnh_null.c    |   10 +
 zebra/zebra_routemap.c    |    1 
 zebra/zebra_vty.c         |   28 ++
 zebra/zserv.c             |   81 ++++++
 zebra/zserv.h             |    6 
 33 files changed, 2218 insertions(+), 214 deletions(-)
 create mode 100644 bgpd/bgp_nht.c
 create mode 100644 bgpd/bgp_nht.h
 create mode 100644 doc/next-hop-tracking.txt
 create mode 100644 lib/nexthop.c
 create mode 100644 lib/nexthop.h
 create mode 100644 zebra/zebra_rnh.c
 create mode 100644 zebra/zebra_rnh.h
 create mode 100644 zebra/zebra_rnh_null.c

diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am
index 9928734..63836a1 100644
--- a/bgpd/Makefile.am
+++ b/bgpd/Makefile.am
@@ -15,14 +15,15 @@ libbgp_a_SOURCES = \
 	bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \
 	bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \
 	bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \
-	bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c
+	bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \
+        bgp_nht.c
 
 noinst_HEADERS = \
 	bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \
 	bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \
 	bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \
 	bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \
-	bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h
+	bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h
 
 bgpd_SOURCES = bgp_main.c
 bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@
diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c
index 726dd86..85e7c47 100644
--- a/bgpd/bgp_debug.c
+++ b/bgpd/bgp_debug.c
@@ -45,6 +45,7 @@ unsigned long conf_bgp_debug_keepalive;
 unsigned long conf_bgp_debug_update;
 unsigned long conf_bgp_debug_normal;
 unsigned long conf_bgp_debug_zebra;
+unsigned long conf_bgp_debug_nht;
 
 unsigned long term_bgp_debug_as4;
 unsigned long term_bgp_debug_fsm;
@@ -55,6 +56,7 @@ unsigned long term_bgp_debug_keepalive;
 unsigned long term_bgp_debug_update;
 unsigned long term_bgp_debug_normal;
 unsigned long term_bgp_debug_zebra;
+unsigned long term_bgp_debug_nht;
 
 /* messages for BGP-4 status */
 const struct message bgp_status_msg[] = 
@@ -471,6 +473,48 @@ ALIAS (no_debug_bgp_events,
        BGP_STR
        "BGP events\n")
 
+DEFUN (debug_bgp_nht,
+       debug_bgp_nht_cmd,
+       "debug bgp nht",
+       DEBUG_STR
+       BGP_STR
+       "BGP nexthop tracking events\n")
+{
+  if (vty->node == CONFIG_NODE)
+    DEBUG_ON (nht, NHT);
+  else
+    {
+      TERM_DEBUG_ON (nht, NHT);
+      vty_out (vty, "BGP nexthop tracking debugging is on%s", VTY_NEWLINE);
+    }
+  return CMD_SUCCESS;
+}
+
+DEFUN (no_debug_bgp_nht,
+       no_debug_bgp_nht_cmd,
+       "no debug bgp nht",
+       NO_STR
+       DEBUG_STR
+       BGP_STR
+       "BGP nexthop tracking events\n")
+{
+  if (vty->node == CONFIG_NODE)
+    DEBUG_OFF (nht, NHT);
+  else
+    {
+      TERM_DEBUG_OFF (nht, NHT);
+      vty_out (vty, "BGP nexthop tracking debugging is off%s", VTY_NEWLINE);
+    }
+  return CMD_SUCCESS;
+}
+
+ALIAS (no_debug_bgp_nht,
+       undebug_bgp_nht_cmd,
+       "undebug bgp nht",
+       UNDEBUG_STR
+       BGP_STR
+       "BGP next-hop tracking updates\n")
+
 DEFUN (debug_bgp_filter,
        debug_bgp_filter_cmd,
        "debug bgp filters",
@@ -787,6 +831,8 @@ DEFUN (show_debugging_bgp,
     vty_out (vty, "  BGP as4 debugging is on%s", VTY_NEWLINE);
   if (BGP_DEBUG (as4, AS4_SEGMENT))
     vty_out (vty, "  BGP as4 aspath segment debugging is on%s", VTY_NEWLINE);
+  if (BGP_DEBUG (nht, NHT))
+    vty_out (vty, "  BGP next-hop tracking debugging is on%s", VTY_NEWLINE);
   vty_out (vty, "%s", VTY_NEWLINE);
   return CMD_SUCCESS;
 }
@@ -860,6 +906,12 @@ bgp_config_write_debug (struct vty *vty)
       write++;
     }
 
+    if (CONF_BGP_DEBUG (nht, NHT))
+    {
+      vty_out (vty, "debug bgp nht%s", VTY_NEWLINE);
+      write++;
+    }
+
   return write;
 }
 
@@ -886,6 +938,8 @@ bgp_debug_init (void)
   install_element (CONFIG_NODE, &debug_bgp_fsm_cmd);
   install_element (ENABLE_NODE, &debug_bgp_events_cmd);
   install_element (CONFIG_NODE, &debug_bgp_events_cmd);
+  install_element (ENABLE_NODE, &debug_bgp_nht_cmd);
+  install_element (CONFIG_NODE, &debug_bgp_nht_cmd);
   install_element (ENABLE_NODE, &debug_bgp_filter_cmd);
   install_element (CONFIG_NODE, &debug_bgp_filter_cmd);
   install_element (ENABLE_NODE, &debug_bgp_keepalive_cmd);
@@ -912,6 +966,9 @@ bgp_debug_init (void)
   install_element (ENABLE_NODE, &no_debug_bgp_events_cmd);
   install_element (ENABLE_NODE, &undebug_bgp_events_cmd);
   install_element (CONFIG_NODE, &no_debug_bgp_events_cmd);
+  install_element (ENABLE_NODE, &no_debug_bgp_nht_cmd);
+  install_element (ENABLE_NODE, &undebug_bgp_nht_cmd);
+  install_element (CONFIG_NODE, &no_debug_bgp_nht_cmd);
   install_element (ENABLE_NODE, &no_debug_bgp_filter_cmd);
   install_element (ENABLE_NODE, &undebug_bgp_filter_cmd);
   install_element (CONFIG_NODE, &no_debug_bgp_filter_cmd);
diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h
index ce8547b..0090f81 100644
--- a/bgpd/bgp_debug.h
+++ b/bgpd/bgp_debug.h
@@ -67,6 +67,7 @@ extern unsigned long conf_bgp_debug_keepalive;
 extern unsigned long conf_bgp_debug_update;
 extern unsigned long conf_bgp_debug_normal;
 extern unsigned long conf_bgp_debug_zebra;
+extern unsigned long conf_bgp_debug_nht;
 
 extern unsigned long term_bgp_debug_as4;
 extern unsigned long term_bgp_debug_fsm;
@@ -77,6 +78,7 @@ extern unsigned long term_bgp_debug_keepalive;
 extern unsigned long term_bgp_debug_update;
 extern unsigned long term_bgp_debug_normal;
 extern unsigned long term_bgp_debug_zebra;
+extern unsigned long term_bgp_debug_nht;
 
 #define BGP_DEBUG_AS4                 0x01
 #define BGP_DEBUG_AS4_SEGMENT         0x02
@@ -90,6 +92,7 @@ extern unsigned long term_bgp_debug_zebra;
 #define BGP_DEBUG_UPDATE_OUT          0x02
 #define BGP_DEBUG_NORMAL              0x01
 #define BGP_DEBUG_ZEBRA               0x01
+#define BGP_DEBUG_NHT                 0x01
 
 #define BGP_DEBUG_PACKET_SEND         0x01
 #define BGP_DEBUG_PACKET_SEND_DETAIL  0x02
diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c
index 75b1e9e..e800a87 100644
--- a/bgpd/bgp_nexthop.c
+++ b/bgpd/bgp_nexthop.c
@@ -30,17 +30,21 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #include "memory.h"
 #include "hash.h"
 #include "jhash.h"
+#include "nexthop.h"
 
 #include "bgpd/bgpd.h"
 #include "bgpd/bgp_table.h"
 #include "bgpd/bgp_route.h"
 #include "bgpd/bgp_attr.h"
 #include "bgpd/bgp_nexthop.h"
+#include "bgpd/bgp_nht.h"
 #include "bgpd/bgp_debug.h"
 #include "bgpd/bgp_damp.h"
 #include "zebra/rib.h"
 #include "zebra/zserv.h"	/* For ZEBRA_SERV_PATH. */
 
+extern struct zclient *zclient;
+
 struct bgp_nexthop_cache *zlookup_query (struct in_addr);
 #ifdef HAVE_IPV6
 struct bgp_nexthop_cache *zlookup_query_ipv6 (struct in6_addr *);
@@ -59,7 +63,7 @@ static int bgp_scan_interval;
 static int bgp_import_interval;
 
 /* Route table for next-hop lookup cache. */
-static struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
+struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
 static struct bgp_table *cache1_table[AFI_MAX];
 static struct bgp_table *cache2_table[AFI_MAX];
 
@@ -69,6 +73,13 @@ static struct bgp_table *bgp_connected_table[AFI_MAX];
 /* BGP nexthop lookup query client. */
 struct zclient *zlookup = NULL;
 
+char *
+bnc_str (struct bgp_nexthop_cache *bnc, char *buf, int size)
+{
+  prefix2str(&(bnc->node->p), buf, size);
+  return buf;
+}
+
 /* Add nexthop to the end of the list.  */
 static void
 bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop)
@@ -84,7 +95,7 @@ bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop)
   nexthop->prev = last;
 }
 
-static void
+void
 bnc_nexthop_free (struct bgp_nexthop_cache *bnc)
 {
   struct nexthop *nexthop;
@@ -97,13 +108,17 @@ bnc_nexthop_free (struct bgp_nexthop_cache *bnc)
     }
 }
 
-static struct bgp_nexthop_cache *
-bnc_new (void)
+struct bgp_nexthop_cache *
+bnc_new ()
 {
-  return XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache));
+  struct bgp_nexthop_cache *bnc;
+
+  bnc = XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache));
+  LIST_INIT(&(bnc->paths));
+  return bnc;
 }
 
-static void
+void
 bnc_free (struct bgp_nexthop_cache *bnc)
 {
   bnc_nexthop_free (bnc);
@@ -111,48 +126,6 @@ bnc_free (struct bgp_nexthop_cache *bnc)
 }
 
 static int
-bgp_nexthop_same (struct nexthop *next1, struct nexthop *next2)
-{
-  if (next1->type != next2->type)
-    return 0;
-
-  switch (next1->type)
-    {
-    case ZEBRA_NEXTHOP_IPV4:
-      if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4))
-	return 0;
-      break;
-    case ZEBRA_NEXTHOP_IPV4_IFINDEX:
-      if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4)
-	  || next1->ifindex != next2->ifindex)
-	return 0;
-      break;
-    case ZEBRA_NEXTHOP_IFINDEX:
-    case ZEBRA_NEXTHOP_IFNAME:
-      if (next1->ifindex != next2->ifindex)
-	return 0;
-      break;
-#ifdef HAVE_IPV6
-    case ZEBRA_NEXTHOP_IPV6:
-      if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
-	return 0;
-      break;
-    case ZEBRA_NEXTHOP_IPV6_IFINDEX:
-    case ZEBRA_NEXTHOP_IPV6_IFNAME:
-      if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
-	return 0;
-      if (next1->ifindex != next2->ifindex)
-	return 0;
-      break;
-#endif /* HAVE_IPV6 */
-    default:
-      /* do nothing */
-      break;
-    }
-  return 1;
-}
-
-static int
 bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1,
 			   struct bgp_nexthop_cache *bnc2)
 {
@@ -167,7 +140,7 @@ bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1,
 
   for (i = 0; i < bnc1->nexthop_num; i++)
     {
-      if (! bgp_nexthop_same (next1, next2))
+      if (! nexthop_same (next1, next2))
 	return 1;
 
       next1 = next1->next;
@@ -416,6 +389,7 @@ bgp_scan (afi_t afi, safi_t safi)
   struct bgp_info *next;
   struct peer *peer;
   struct listnode *node, *nnode;
+#if BGP_SCAN_NEXTHOP
   int valid;
   int current;
   int changed;
@@ -426,6 +400,7 @@ bgp_scan (afi_t afi, safi_t safi)
     bgp_nexthop_cache_table[afi] = cache2_table[afi];
   else
     bgp_nexthop_cache_table[afi] = cache1_table[afi];
+#endif
 
   /* Get default bgp. */
   bgp = bgp_get_default ();
@@ -455,6 +430,7 @@ bgp_scan (afi_t afi, safi_t safi)
 
 	  if (bi->type == ZEBRA_ROUTE_BGP && bi->sub_type == BGP_ROUTE_NORMAL)
 	    {
+#if BGP_SCAN_NEXTHOP
 	      changed = 0;
 	      metricchanged = 0;
 
@@ -486,6 +462,7 @@ bgp_scan (afi_t afi, safi_t safi)
 					       afi, SAFI_UNICAST);
 		    }
 		}
+#endif
 
               if (CHECK_FLAG (bgp->af_flags[afi][SAFI_UNICAST],
 		  BGP_CONFIG_DAMPENING)
@@ -498,11 +475,13 @@ bgp_scan (afi_t afi, safi_t safi)
       bgp_process (bgp, rn, afi, SAFI_UNICAST);
     }
 
+#if BGP_SCAN_NEXTHOP
   /* Flash old cache. */
   if (bgp_nexthop_cache_table[afi] == cache1_table[afi])
     bgp_nexthop_cache_reset (cache2_table[afi]);
   else
     bgp_nexthop_cache_reset (cache1_table[afi]);
+#endif
 
   if (BGP_DEBUG (events, EVENTS))
     {
@@ -1289,9 +1268,7 @@ static int
 show_ip_bgp_scan_tables (struct vty *vty, const char detail)
 {
   struct bgp_node *rn;
-  struct bgp_nexthop_cache *bnc;
   char buf[INET6_ADDRSTRLEN];
-  u_char i;
 
   if (bgp_scan_thread)
     vty_out (vty, "BGP scan is running%s", VTY_NEWLINE);
@@ -1299,6 +1276,7 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
     vty_out (vty, "BGP scan is not running%s", VTY_NEWLINE);
   vty_out (vty, "BGP scan interval is %d%s", bgp_scan_interval, VTY_NEWLINE);
 
+#if BGP_SCAN_NEXTHOP
   vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE);
   for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn))
     if ((bnc = rn->info) != NULL)
@@ -1363,7 +1341,9 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
 	}
   }
 #endif /* HAVE_IPV6 */
-
+#else
+  vty_out (vty, "BGP next-hop tracking is on%s", VTY_NEWLINE);
+#endif
   vty_out (vty, "BGP connected route:%s", VTY_NEWLINE);
   for (rn = bgp_table_top (bgp_connected_table[AFI_IP]); 
        rn; 
@@ -1388,6 +1368,117 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
   return CMD_SUCCESS;
 }
 
+static int
+show_ip_bgp_nexthop_table (struct vty *vty, int detail)
+{
+  struct bgp_node *rn;
+  struct bgp_nexthop_cache *bnc;
+  char buf[INET6_ADDRSTRLEN];
+  time_t tbuf;
+  u_char i;
+
+  vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE);
+  for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn))
+    if ((bnc = rn->info) != NULL)
+      {
+	if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID))
+	{
+	  vty_out (vty, " %s valid [IGP metric %d], #paths %d%s",
+		   inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN),
+		   bnc->metric, bnc->path_count, VTY_NEWLINE);
+	  if (detail)
+	    for (i = 0; i < bnc->nexthop_num; i++)
+	      switch (bnc->nexthop[i].type)
+	      {
+	      case NEXTHOP_TYPE_IPV4:
+		vty_out (vty, "  gate %s%s",
+			 inet_ntop (AF_INET, &bnc->nexthop[i].gate.ipv4, buf,
+				    INET6_ADDRSTRLEN), VTY_NEWLINE);
+		break;
+	      case NEXTHOP_TYPE_IFINDEX:
+		vty_out (vty, "  if %s%s",
+			 ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE);
+		break;
+	      case NEXTHOP_TYPE_IPV4_IFINDEX:
+		vty_out (vty, "  gate %s, if %s%s",
+			 inet_ntop(AF_INET, &bnc->nexthop[i].gate.ipv4, buf,
+				   INET6_ADDRSTRLEN),
+			 ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE);
+		break;
+	      default:
+		vty_out (vty, "  invalid nexthop type %u%s",
+			 bnc->nexthop[i].type, VTY_NEWLINE);
+	      }
+	}
+	else
+	  vty_out (vty, " %s invalid%s",
+		   inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN), VTY_NEWLINE);
+#ifdef HAVE_CLOCK_MONOTONIC
+	tbuf = time(NULL) - (bgp_clock() - bnc->last_update);
+	vty_out (vty, "  Last update: %s", ctime(&tbuf));
+#else
+	vty_out (vty, "  Last update: %s", ctime(&bnc->uptime));
+#endif /* HAVE_CLOCK_MONOTONIC */
+
+	vty_out(vty, "%s", VTY_NEWLINE);
+      }
+
+#ifdef HAVE_IPV6
+  {
+    for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP6]);
+         rn;
+         rn = bgp_route_next (rn))
+      if ((bnc = rn->info) != NULL)
+	{
+	  if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID))
+	  {
+	    vty_out (vty, " %s valid [IGP metric %d]%s",
+		     inet_ntop (AF_INET6, &rn->p.u.prefix6, buf,
+				INET6_ADDRSTRLEN),
+		     bnc->metric, VTY_NEWLINE);
+	    if (detail)
+	      for (i = 0; i < bnc->nexthop_num; i++)
+		switch (bnc->nexthop[i].type)
+		{
+		case NEXTHOP_TYPE_IPV6:
+		  vty_out (vty, "  gate %s%s",
+			   inet_ntop (AF_INET6, &bnc->nexthop[i].gate.ipv6,
+				      buf, INET6_ADDRSTRLEN), VTY_NEWLINE);
+		  break;
+		case NEXTHOP_TYPE_IPV6_IFINDEX:
+		  vty_out(vty, "  gate %s, if %s%s",
+			  inet_ntop(AF_INET6, &bnc->nexthop[i].gate.ipv6, buf,
+				    INET6_ADDRSTRLEN),
+			  ifindex2ifname(bnc->nexthop[i].ifindex),
+			  VTY_NEWLINE);
+		  break;
+		case NEXTHOP_TYPE_IFINDEX:
+		  vty_out (vty, "  ifidx %u%s", bnc->nexthop[i].ifindex,
+			   VTY_NEWLINE);
+		  break;
+		default:
+		  vty_out (vty, "  invalid nexthop type %u%s",
+			   bnc->nexthop[i].type, VTY_NEWLINE);
+		}
+	  }
+	  else
+	    vty_out (vty, " %s invalid%s",
+		     inet_ntop (AF_INET6, &rn->p.u.prefix6, buf, INET6_ADDRSTRLEN),
+		     VTY_NEWLINE);
+#ifdef HAVE_CLOCK_MONOTONIC
+	  tbuf = time(NULL) - (bgp_clock() - bnc->last_update);
+	  vty_out (vty, "  Last update: %s", ctime(&tbuf));
+#else
+	  vty_out (vty, "  Last update: %s", ctime(&bnc->uptime));
+#endif /* HAVE_CLOCK_MONOTONIC */
+
+	  vty_out(vty, "%s", VTY_NEWLINE);
+	}
+  }
+#endif /* HAVE_IPV6 */
+  return CMD_SUCCESS;
+}
+
 DEFUN (show_ip_bgp_scan,
        show_ip_bgp_scan_cmd,
        "show ip bgp scan",
@@ -1411,6 +1502,28 @@ DEFUN (show_ip_bgp_scan_detail,
   return show_ip_bgp_scan_tables (vty, 1);
 }
 
+DEFUN (show_ip_bgp_nexthop,
+       show_ip_bgp_nexthop_cmd,
+       "show ip bgp nexthop",
+       SHOW_STR
+       IP_STR
+       BGP_STR
+       "BGP nexthop table\n")
+{
+  return show_ip_bgp_nexthop_table (vty, 0);
+}
+
+DEFUN (show_ip_bgp_nexthop_detail,
+       show_ip_bgp_nexthop_detail_cmd,
+       "show ip bgp nexthop detail",
+       SHOW_STR
+       IP_STR
+       BGP_STR
+       "BGP nexthop table\n")
+{
+  return show_ip_bgp_nexthop_table (vty, 1);
+}
+
 int
 bgp_config_write_scan_time (struct vty *vty)
 {
@@ -1453,8 +1566,12 @@ bgp_scan_init (void)
   install_element (BGP_NODE, &no_bgp_scan_time_val_cmd);
   install_element (VIEW_NODE, &show_ip_bgp_scan_cmd);
   install_element (VIEW_NODE, &show_ip_bgp_scan_detail_cmd);
+  install_element (VIEW_NODE, &show_ip_bgp_nexthop_cmd);
+  install_element (VIEW_NODE, &show_ip_bgp_nexthop_detail_cmd);
   install_element (RESTRICTED_NODE, &show_ip_bgp_scan_cmd);
   install_element (ENABLE_NODE, &show_ip_bgp_scan_cmd);
+  install_element (ENABLE_NODE, &show_ip_bgp_nexthop_cmd);
+  install_element (ENABLE_NODE, &show_ip_bgp_nexthop_detail_cmd);
   install_element (ENABLE_NODE, &show_ip_bgp_scan_detail_cmd);
 }
 
diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h
index 6e5350e..abcf242 100644
--- a/bgpd/bgp_nexthop.h
+++ b/bgpd/bgp_nexthop.h
@@ -22,6 +22,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #define _QUAGGA_BGP_NEXTHOP_H
 
 #include "if.h"
+#include "queue.h"
+#include "prefix.h"
 
 #define BGP_SCAN_INTERVAL_DEFAULT   60
 #define BGP_IMPORT_INTERVAL_DEFAULT 15
@@ -44,6 +46,20 @@ struct bgp_nexthop_cache
   /* Nexthop number and nexthop linked list.*/
   u_char nexthop_num;
   struct nexthop *nexthop;
+  time_t last_update;
+  u_int16_t flags;
+
+#define BGP_NEXTHOP_VALID (1 << 0)
+#define BGP_NEXTHOP_REGISTERED (1 << 1)
+
+  u_int16_t change_flags;
+
+#define BGP_NEXTHOP_CHANGED (1 << 0)
+#define BGP_NEXTHOP_METRIC_CHANGED (1 << 1)
+
+  struct bgp_node *node;
+  LIST_HEAD(path_list, bgp_info) paths;
+  unsigned int path_count;
 };
 
 extern void bgp_scan_init (void);
@@ -57,5 +73,9 @@ extern int bgp_config_write_scan_time (struct vty *);
 extern int bgp_nexthop_onlink (afi_t, struct attr *);
 extern int bgp_nexthop_self (struct attr *);
 extern void bgp_address_init (void);
+extern struct bgp_nexthop_cache *bnc_new();
+extern void bnc_free(struct bgp_nexthop_cache *bnc);
+extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc);
+extern char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size);
 
 #endif /* _QUAGGA_BGP_NEXTHOP_H */
diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c
new file mode 100644
index 0000000..94b6145
--- /dev/null
+++ b/bgpd/bgp_nht.c
@@ -0,0 +1,467 @@
+/* BGP Nexthop tracking
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "thread.h"
+#include "prefix.h"
+#include "zclient.h"
+#include "stream.h"
+#include "network.h"
+#include "log.h"
+#include "memory.h"
+#include "nexthop.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_nexthop.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_nht.h"
+
+extern struct zclient *zclient;
+extern struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
+
+static void register_nexthop(struct bgp_nexthop_cache *bnc);
+static void unregister_nexthop (struct bgp_nexthop_cache *bnc);
+static void evaluate_paths(struct bgp_nexthop_cache *bnc);
+static int make_prefix(int afi, struct bgp_info *ri, struct prefix *p);
+static void path_nh_map(struct bgp_info *path, struct bgp_nexthop_cache *bnc,
+			int keep);
+
+int
+bgp_find_nexthop (struct bgp_info *path, int *changed, int *metricchanged)
+{
+  struct bgp_nexthop_cache *bnc = path->nexthop;
+
+  if (!bnc)
+    return 0;
+
+  if (changed)
+    *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+
+  if (metricchanged)
+    *metricchanged = CHECK_FLAG(bnc->change_flags,
+				BGP_NEXTHOP_METRIC_CHANGED);
+
+  return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
+}
+
+void
+bgp_unlink_nexthop (struct bgp_info *path)
+{
+  struct bgp_nexthop_cache *bnc = path->nexthop;
+
+  if (!bnc)
+    return;
+
+  path_nh_map(path, NULL, 0);
+
+  if (LIST_EMPTY(&(bnc->paths)))
+    {
+      if (BGP_DEBUG(nht, NHT))
+	{
+	  char buf[INET6_ADDRSTRLEN];
+	  zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
+		     bnc_str(bnc, buf, INET6_ADDRSTRLEN));
+	}
+      unregister_nexthop(bnc);
+      bnc->node->info = NULL;
+      bgp_unlock_node(bnc->node);
+      bnc_free(bnc);
+    }
+}
+
+int
+bgp_find_or_add_nexthop (afi_t afi, struct bgp_info *ri, int *changed,
+			 int *metricchanged)
+{
+  struct bgp_node *rn;
+  struct bgp_nexthop_cache *bnc;
+  struct prefix p;
+
+  if (make_prefix(afi, ri, &p) < 0)
+    return 1;
+  rn = bgp_node_get (bgp_nexthop_cache_table[afi], &p);
+
+  if (!rn->info)
+    {
+      bnc = bnc_new();
+      rn->info = bnc;
+      bnc->node = rn;
+      bgp_lock_node(rn);
+      register_nexthop(bnc);
+    }
+  bnc = rn->info;
+  bgp_unlock_node (rn);
+  path_nh_map(ri, bnc, 1);
+
+  if (changed)
+    *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+
+  if (metricchanged)
+    *metricchanged = CHECK_FLAG(bnc->change_flags,
+				BGP_NEXTHOP_METRIC_CHANGED);
+
+  if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
+    (bgp_info_extra_get(ri))->igpmetric = bnc->metric;
+  else if (ri->extra)
+    ri->extra->igpmetric = 0;
+
+  return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
+}
+
+void
+bgp_parse_nexthop_update (void)
+{
+  struct stream *s;
+  struct bgp_node *rn;
+  struct bgp_nexthop_cache *bnc;
+  struct nexthop *nexthop;
+  struct nexthop *oldnh;
+  struct nexthop *nhlist_head = NULL;
+  struct nexthop *nhlist_tail = NULL;
+  uint32_t metric;
+  u_char nexthop_num;
+  struct prefix p;
+  int i;
+
+  s = zclient->ibuf;
+
+  memset(&p, 0, sizeof(struct prefix));
+  p.family = stream_getw(s);
+  p.prefixlen = stream_getc(s);
+  switch (p.family)
+    {
+    case AF_INET:
+      p.u.prefix4.s_addr = stream_get_ipv4 (s);
+      break;
+    case AF_INET6:
+      stream_get(&p.u.prefix6, s, 16);
+      break;
+    default:
+      break;
+    }
+
+  rn = bgp_node_lookup(bgp_nexthop_cache_table[family2afi(p.family)], &p);
+  if (!rn || !rn->info)
+    {
+      if (BGP_DEBUG(nht, NHT))
+	{
+	  char buf[INET6_ADDRSTRLEN];
+	  prefix2str(&p, buf, INET6_ADDRSTRLEN);
+	  zlog_debug("parse nexthop update(%s): rn not found", buf);
+	}
+      return;
+    }
+
+  bnc = rn->info;
+  bnc->last_update = bgp_clock();
+  bnc->change_flags = 0;
+  metric = stream_getl (s);
+  nexthop_num = stream_getc (s);
+
+  /* debug print the input */
+  if (BGP_DEBUG(nht, NHT))
+    {
+      char buf[INET6_ADDRSTRLEN];
+      prefix2str(&p, buf, INET6_ADDRSTRLEN);
+      zlog_debug("parse nexthop update(%s): metric=%d, #nexthop=%d", buf,
+		 metric, nexthop_num);
+    }
+
+  if (metric != bnc->metric)
+    bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
+
+  if(nexthop_num != bnc->nexthop_num)
+    bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+
+  if (nexthop_num)
+    {
+      bnc->flags |= BGP_NEXTHOP_VALID;
+      bnc->metric = metric;
+      bnc->nexthop_num = nexthop_num;
+
+      for (i = 0; i < nexthop_num; i++)
+	{
+	  nexthop = nexthop_new();
+	  nexthop->type = stream_getc (s);
+	  switch (nexthop->type)
+	    {
+	    case ZEBRA_NEXTHOP_IPV4:
+	      nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
+	      break;
+	    case ZEBRA_NEXTHOP_IFINDEX:
+	    case ZEBRA_NEXTHOP_IFNAME:
+	      nexthop->ifindex = stream_getl (s);
+	      break;
+            case ZEBRA_NEXTHOP_IPV4_IFINDEX:
+	    case ZEBRA_NEXTHOP_IPV4_IFNAME:
+	      nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
+	      nexthop->ifindex = stream_getl (s);
+	      break;
+#ifdef HAVE_IPV6
+            case ZEBRA_NEXTHOP_IPV6:
+	      stream_get (&nexthop->gate.ipv6, s, 16);
+	      break;
+            case ZEBRA_NEXTHOP_IPV6_IFINDEX:
+	    case ZEBRA_NEXTHOP_IPV6_IFNAME:
+	      stream_get (&nexthop->gate.ipv6, s, 16);
+	      nexthop->ifindex = stream_getl (s);
+	      break;
+#endif
+            default:
+              /* do nothing */
+              break;
+	    }
+
+	  if (nhlist_tail)
+	    {
+	      nhlist_tail->next = nexthop;
+	      nhlist_tail = nexthop;
+	    }
+	  else
+	    {
+	      nhlist_tail = nexthop;
+	      nhlist_head = nexthop;
+	    }
+
+	  /* No need to evaluate the nexthop if we have already determined
+	   * that there has been a change.
+	   */
+	  if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
+	    continue;
+
+	  for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
+	      if (nexthop_same(oldnh, nexthop))
+		  break;
+
+	  if (!oldnh)
+	    bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+	}
+      bnc_nexthop_free(bnc);
+      bnc->nexthop = nhlist_head;
+    }
+  else
+    {
+      bnc->flags &= ~BGP_NEXTHOP_VALID;
+      bnc_nexthop_free(bnc);
+      bnc->nexthop = NULL;
+    }
+  evaluate_paths(bnc);
+}
+
+/**
+ * make_prefix - make a prefix structure from the path (essentially
+ * path's node.
+ */
+static int
+make_prefix (int afi, struct bgp_info *ri, struct prefix *p)
+{
+  memset (p, 0, sizeof (struct prefix));
+  switch (afi)
+    {
+    case AFI_IP:
+      p->family = AF_INET;
+      p->prefixlen = IPV4_MAX_BITLEN;
+      p->u.prefix4 = ri->attr->nexthop;
+      break;
+#ifdef HAVE_IPV6
+    case AFI_IP6:
+      if (ri->attr->extra->mp_nexthop_len != 16
+	  || IN6_IS_ADDR_LINKLOCAL (&ri->attr->extra->mp_nexthop_global))
+	return -1;
+
+      p->family = AF_INET6;
+      p->prefixlen = IPV6_MAX_BITLEN;
+      p->u.prefix6 = ri->attr->extra->mp_nexthop_global;
+      break;
+#endif
+    default:
+      break;
+    }
+  return 0;
+}
+
+/**
+ * sendmsg_nexthop -- Format and send a nexthop register/Unregister
+ *   command to Zebra.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ *   int command -- either ZEBRA_NEXTHOP_REGISTER or ZEBRA_NEXTHOP_UNREGISTER
+ * RETURNS:
+ *   void.
+ */
+static void
+sendmsg_nexthop (struct bgp_nexthop_cache *bnc, int command)
+{
+  struct stream *s;
+  struct prefix *p;
+  int ret;
+
+  /* Check socket. */
+  if (!zclient || zclient->sock < 0)
+    return;
+
+  p = &(bnc->node->p);
+  s = zclient->obuf;
+  stream_reset (s);
+  zclient_create_header (s, command);
+  stream_putw(s, PREFIX_FAMILY(p));
+  stream_putc(s, p->prefixlen);
+  switch (PREFIX_FAMILY(p))
+    {
+    case AF_INET:
+      stream_put_in_addr (s, &p->u.prefix4);
+      break;
+#ifdef HAVE_IPV6
+    case AF_INET6:
+      stream_put(s, &(p->u.prefix6), 16);
+      break;
+#endif
+    default:
+      break;
+    }
+  stream_putw_at (s, 0, stream_get_endp (s));
+
+  ret = zclient_send_message(zclient);
+  /* TBD: handle the failure */
+  if (ret < 0)
+    zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
+  return;
+}
+
+/**
+ * register_nexthop - register a nexthop with Zebra for notification
+ *    when the route to the nexthop changes.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ *   void.
+ */
+static void
+register_nexthop (struct bgp_nexthop_cache *bnc)
+{
+  /* Check if we have already registered */
+  if (bnc->flags & BGP_NEXTHOP_REGISTERED)
+    return;
+  sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_REGISTER);
+  SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+}
+
+/**
+ * unregister_nexthop -- Unregister the nexthop from Zebra.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ *   void.
+ */
+static void
+unregister_nexthop (struct bgp_nexthop_cache *bnc)
+{
+  /* Check if we have already registered */
+  if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
+    return;
+
+  sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_UNREGISTER);
+  UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+}
+
+/**
+ * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
+ * ARGUMENTS:
+ *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ *   void.
+ */
+static void
+evaluate_paths (struct bgp_nexthop_cache *bnc)
+{
+  struct bgp_node *rn;
+  struct bgp_info *path;
+  struct bgp *bgp = bgp_get_default();
+  int afi;
+
+  LIST_FOREACH(path, &(bnc->paths), nh_thread)
+    {
+      if (!(path->type == ZEBRA_ROUTE_BGP &&
+	    path->sub_type == BGP_ROUTE_NORMAL))
+	continue;
+
+      rn = path->net;
+      afi = family2afi(rn->p.family);
+
+      /* Path becomes valid/invalid depending on whether the nexthop
+       * reachable/unreachable.
+       */
+      if ((CHECK_FLAG(path->flags, BGP_INFO_VALID) ? 1 : 0) !=
+	  (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? 1 : 0))
+	{
+	  if (CHECK_FLAG (path->flags, BGP_INFO_VALID))
+	    {
+	      bgp_aggregate_decrement (bgp, &rn->p, path,
+				       afi, SAFI_UNICAST);
+	      bgp_info_unset_flag (rn, path, BGP_INFO_VALID);
+	    }
+	  else
+	    {
+	      bgp_info_set_flag (rn, path, BGP_INFO_VALID);
+	      bgp_aggregate_increment (bgp, &rn->p, path,
+				       afi, SAFI_UNICAST);
+	    }
+	}
+
+      /* Copy the metric to the path. Will be used for bestpath computation */
+      if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
+	(bgp_info_extra_get(path))->igpmetric = bnc->metric;
+      else if (path->extra)
+	path->extra->igpmetric = 0;
+      bgp_process(bgp, rn, afi, SAFI_UNICAST);
+    }
+  RESET_FLAG(bnc->change_flags);
+}
+
+/**
+ * path_nh_map - make or break path-to-nexthop association.
+ * ARGUMENTS:
+ *   path - pointer to the path structure
+ *   bnc - pointer to the nexthop structure
+ *   make - if set, make the association. if unset, just break the existing
+ *          association.
+ */
+static void
+path_nh_map (struct bgp_info *path, struct bgp_nexthop_cache *bnc, int make)
+{
+  if (path->nexthop)
+    {
+      LIST_REMOVE(path, nh_thread);
+      path->nexthop->path_count--;
+      path->nexthop = NULL;
+    }
+  if (make)
+    {
+      LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
+      path->nexthop = bnc;
+      path->nexthop->path_count++;
+    }
+}
diff --git a/bgpd/bgp_nht.h b/bgpd/bgp_nht.h
new file mode 100644
index 0000000..0e43f0a
--- /dev/null
+++ b/bgpd/bgp_nht.h
@@ -0,0 +1,62 @@
+/* BGP Nexthop tracking
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _BGP_NHT_H
+#define _BGP_NHT_H
+
+/**
+ * bgp_parse_nexthop_update() - parse a nexthop update message from Zebra.
+ */
+extern void bgp_parse_nexthop_update();
+
+/**
+ * bgp_find_nexthop() - lookup the nexthop cache table for the bnc object
+ * ARGUMENTS:
+ *   p - path for which the nexthop object is being looked up
+ *   c - output variable that stores whether the nexthop object has changed
+ *        since last time.
+ *   m - output variable that stores whether the nexthop metric has changed
+ *        since last time.
+ */
+extern int bgp_find_nexthop(struct bgp_info *p, int *c, int *m);
+
+/**
+ * bgp_find_or_add_nexthop() - lookup the nexthop cache table for the bnc
+ *  object. If not found, create a new object and register with ZEBRA for
+ *  nexthop notification.
+ * ARGUMENTS:
+ *   a - afi: AFI_IP or AF_IP6
+ *   p - path for which the nexthop object is being looked up
+ *   c - output variable that stores whether the nexthop object has changed
+ *        since last time.
+ *   m - output variable that stores whether the nexthop metric has changed
+ *        since last time.
+ */
+extern int bgp_find_or_add_nexthop(afi_t a, struct bgp_info *p, int *c, int *m);
+
+/**
+ * bgp_unlink_nexthop() - Unlink the nexthop object from the path structure.
+ * ARGUMENTS:
+ *   p - path structure.
+ */
+extern void bgp_unlink_nexthop(struct bgp_info *p);
+
+#endif /* _BGP_NHT_H */
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 058c0dc..4c30bbe 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -55,6 +55,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #include "bgpd/bgp_zebra.h"
 #include "bgpd/bgp_vty.h"
 #include "bgpd/bgp_mpath.h"
+#include "bgpd/bgp_nht.c"
 
 /* Extern from bgp_dump.c */
 extern const char *bgp_origin_str[];
@@ -126,20 +127,14 @@ bgp_info_extra_get (struct bgp_info *ri)
   return ri->extra;
 }
 
-/* Allocate new bgp info structure. */
-static struct bgp_info *
-bgp_info_new (void)
-{
-  return XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info));
-}
-
 /* Free bgp route information. */
 static void
 bgp_info_free (struct bgp_info *binfo)
 {
   if (binfo->attr)
     bgp_attr_unintern (&binfo->attr);
-  
+
+  bgp_unlink_nexthop(binfo);
   bgp_info_extra_free (&binfo->extra);
   bgp_info_mpath_free (&binfo->mpath);
 
@@ -1871,6 +1866,23 @@ bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer,
   bgp_rib_remove (rn, ri, peer, afi, safi);
 }
 
+static struct bgp_info *
+info_make (int type, int sub_type, struct peer *peer, struct attr *attr,
+	   struct bgp_node *rn)
+{
+  struct bgp_info *new;
+
+  /* Make new BGP info. */
+  new = XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info));
+  new->type = type;
+  new->sub_type = sub_type;
+  new->peer = peer;
+  new->attr = attr;
+  new->uptime = bgp_clock ();
+  new->net = rn;
+  return new;
+}
+
 static void
 bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi,
       struct attr *attr, struct peer *peer, struct prefix *p, int type,
@@ -2018,13 +2030,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi,
               p->prefixlen, rsclient->host);
     }
 
-  /* Make new BGP info. */
-  new = bgp_info_new ();
-  new->type = type;
-  new->sub_type = sub_type;
-  new->peer = peer;
-  new->attr = attr_new;
-  new->uptime = bgp_clock ();
+  new = info_make(type, sub_type, peer, attr_new, rn);
 
   /* Update MPLS tag. */
   if (safi == SAFI_MPLS_VPN)
@@ -2324,7 +2330,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
 	      || (peer->sort == BGP_PEER_EBGP && peer->ttl != 1)
 	      || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)))
 	{
-	  if (bgp_nexthop_lookup (afi, peer, ri, NULL, NULL))
+	  if (bgp_find_or_add_nexthop (afi, ri, NULL, NULL))
 	    bgp_info_set_flag (rn, ri, BGP_INFO_VALID);
 	  else
 	    bgp_info_unset_flag (rn, ri, BGP_INFO_VALID);
@@ -2351,12 +2357,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
     }
 
   /* Make new BGP info. */
-  new = bgp_info_new ();
-  new->type = type;
-  new->sub_type = sub_type;
-  new->peer = peer;
-  new->attr = attr_new;
-  new->uptime = bgp_clock ();
+  new = info_make(type, sub_type, peer, attr_new, rn);
 
   /* Update MPLS tag. */
   if (safi == SAFI_MPLS_VPN)
@@ -2370,7 +2371,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
 	  || (peer->sort == BGP_PEER_EBGP && peer->ttl != 1)
 	  || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)))
     {
-      if (bgp_nexthop_lookup (afi, peer, new, NULL, NULL))
+      if (bgp_find_or_add_nexthop (afi, new, NULL, NULL))
 	bgp_info_set_flag (rn, new, BGP_INFO_VALID);
       else
         bgp_info_unset_flag (rn, new, BGP_INFO_VALID);
@@ -3458,15 +3459,11 @@ bgp_static_update_rsclient (struct peer *rsclient, struct prefix *p,
           return;
         }
     }
-  
+
   /* Make new BGP info. */
-  new = bgp_info_new ();
-  new->type = ZEBRA_ROUTE_BGP;
-  new->sub_type = BGP_ROUTE_STATIC;
-  new->peer = bgp->peer_self;
+  new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self,
+		  attr_new, rn);
   SET_FLAG (new->flags, BGP_INFO_VALID);
-  new->attr = attr_new;
-  new->uptime = bgp_clock ();
 
   /* Register new BGP information. */
   bgp_info_add (rn, new);
@@ -3579,13 +3576,9 @@ bgp_static_update_main (struct bgp *bgp, struct prefix *p,
     }
 
   /* Make new BGP info. */
-  new = bgp_info_new ();
-  new->type = ZEBRA_ROUTE_BGP;
-  new->sub_type = BGP_ROUTE_STATIC;
-  new->peer = bgp->peer_self;
+  new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self, attr_new,
+		  rn);
   SET_FLAG (new->flags, BGP_INFO_VALID);
-  new->attr = attr_new;
-  new->uptime = bgp_clock ();
 
   /* Aggregate address increment. */
   bgp_aggregate_increment (bgp, p, new, afi, safi);
@@ -3630,13 +3623,10 @@ bgp_static_update_vpnv4 (struct bgp *bgp, struct prefix *p, afi_t afi,
   rn = bgp_afi_node_get (bgp->rib[afi][safi], afi, safi, p, prd);
 
   /* Make new BGP info. */
-  new = bgp_info_new ();
-  new->type = ZEBRA_ROUTE_BGP;
-  new->sub_type = BGP_ROUTE_STATIC;
-  new->peer = bgp->peer_self;
-  new->attr = bgp_attr_default_intern (BGP_ORIGIN_IGP);
+  new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self,
+		  bgp_attr_default_intern(BGP_ORIGIN_IGP), rn);
+
   SET_FLAG (new->flags, BGP_INFO_VALID);
-  new->uptime = bgp_clock ();
   new->extra = bgp_info_extra_new();
   memcpy (new->extra->tag, tag, 3);
 
@@ -4708,13 +4698,10 @@ bgp_aggregate_route (struct bgp *bgp, struct prefix *p, struct bgp_info *rinew,
   if (aggregate->count > 0)
     {
       rn = bgp_node_get (table, p);
-      new = bgp_info_new ();
-      new->type = ZEBRA_ROUTE_BGP;
-      new->sub_type = BGP_ROUTE_AGGREGATE;
-      new->peer = bgp->peer_self;
+      new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self,
+		      bgp_attr_aggregate_intern(bgp, origin, aspath, community,
+						aggregate->as_set), rn);
       SET_FLAG (new->flags, BGP_INFO_VALID);
-      new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set);
-      new->uptime = bgp_clock ();
 
       bgp_info_add (rn, new);
       bgp_unlock_node (rn);
@@ -4892,14 +4879,10 @@ bgp_aggregate_add (struct bgp *bgp, struct prefix *p, afi_t afi, safi_t safi,
   if (aggregate->count)
     {
       rn = bgp_node_get (table, p);
-
-      new = bgp_info_new ();
-      new->type = ZEBRA_ROUTE_BGP;
-      new->sub_type = BGP_ROUTE_AGGREGATE;
-      new->peer = bgp->peer_self;
+      new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self,
+		      bgp_attr_aggregate_intern(bgp, origin, aspath, community,
+						aggregate->as_set), rn);
       SET_FLAG (new->flags, BGP_INFO_VALID);
-      new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set);
-      new->uptime = bgp_clock ();
 
       bgp_info_add (rn, new);
       bgp_unlock_node (rn);
@@ -5539,16 +5522,12 @@ bgp_redistribute_add (struct prefix *p, const struct in_addr *nexthop,
  		  aspath_unintern (&attr.aspath);
  		  bgp_attr_extra_free (&attr);
  		  return;
- 		} 
+		}
  	    }
 
-	  new = bgp_info_new ();
-	  new->type = type;
-	  new->sub_type = BGP_ROUTE_REDISTRIBUTE;
-	  new->peer = bgp->peer_self;
+	  new = info_make(type, BGP_ROUTE_REDISTRIBUTE, bgp->peer_self,
+			  new_attr, bn);
 	  SET_FLAG (new->flags, BGP_INFO_VALID);
-	  new->attr = new_attr;
-	  new->uptime = bgp_clock ();
 
 	  bgp_aggregate_increment (bgp, p, new, afi, SAFI_UNICAST);
 	  bgp_info_add (bn, new);
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index fea18dd..f8cc34f 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -21,8 +21,11 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #ifndef _QUAGGA_BGP_ROUTE_H
 #define _QUAGGA_BGP_ROUTE_H
 
+#include "queue.h"
 #include "bgp_table.h"
 
+struct bgp_nexthop_cache;
+
 /* Ancillary information to struct bgp_info, 
  * used for uncommonly used data (aggregation, MPLS, etc.)
  * and lazily allocated to save memory.
@@ -47,7 +50,16 @@ struct bgp_info
   /* For linked list. */
   struct bgp_info *next;
   struct bgp_info *prev;
-  
+
+  /* For nexthop linked list */
+  LIST_ENTRY(bgp_info) nh_thread;
+
+  /* Back pointer to the prefix node */
+  struct bgp_node *net;
+
+  /* Back pointer to the nexthop structure */
+  struct bgp_nexthop_cache *nexthop;
+
   /* Peer structure.  */
   struct peer *peer;
 
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index d6ae375..0ae804d 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -38,6 +38,7 @@ Boston, MA 02111-1307, USA.  */
 #include "bgpd/bgp_fsm.h"
 #include "bgpd/bgp_debug.h"
 #include "bgpd/bgp_mpath.h"
+#include "bgpd/bgp_nexthop.h"
 
 /* All information about zebra. */
 struct zclient *zclient = NULL;
@@ -75,6 +76,15 @@ bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length)
   return 0;
 }
 
+/* Nexthop update message from zebra. */
+static int
+bgp_read_nexthop_update (int command, struct zclient *zclient,
+			 zebra_size_t length)
+{
+  bgp_parse_nexthop_update();
+  return 0;
+}
+
 /* Inteface addition message from zebra. */
 static int
 bgp_interface_add (int command, struct zclient *zclient, zebra_size_t length)
@@ -1187,6 +1197,7 @@ bgp_zebra_init (void)
   zclient->ipv6_route_add = zebra_read_ipv6;
   zclient->ipv6_route_delete = zebra_read_ipv6;
 #endif /* HAVE_IPV6 */
+  zclient->nexthop_update = bgp_read_nexthop_update;
 
   /* Interface related init. */
   if_init ();
diff --git a/doc/next-hop-tracking.txt b/doc/next-hop-tracking.txt
new file mode 100644
index 0000000..c2e57ca
--- /dev/null
+++ b/doc/next-hop-tracking.txt
@@ -0,0 +1,326 @@
+0. Introduction
+
+This is the design specification for next hop tracking feature in
+Quagga.
+
+1. Background
+
+Recursive routes are of the form:
+
+   p/m --> n
+  [Ex: 1.1.0.0/16 --> 2.2.2.2]
+
+where 'n' itself is resolved through another route as follows:
+
+   p2/m --> h, interface
+  [Ex: 2.2.2.0/24 --> 3.3.3.3, eth0]
+
+Usually, BGP routes are recursive in nature and BGP nexthops get
+resolved through an IGP route. IGP usually adds its routes pointing to
+an interface (these are called non-recursive routes).
+
+When BGP receives a recursive route from a peer, it needs to validate
+the nexthop. The path is marked valid or invalid based on the
+reachability status of the nexthop.  Nexthop validation is also
+important for BGP decision process as the metric to reach the nexthop
+is a parameter to best path selection process.
+
+As it goes with routing, this is a dynamic process. Route to the
+nexthop can change. The nexthop can become unreachable or
+reachable. In the current BGP implementation, the nexthop validation
+is done periodically in the scanner run. The default scanner run
+interval is one minute. Every minute, the scanner task walks the
+entire BGP table. It checks the validity of each nexthop with Zebra
+(the routing table manager) through a request and response message
+exchange between BGP and Zebra process. BGP process is blocked for
+that duration. The mechanism has two major drawbacks:
+
+(1) The scanner task runs to completion. That can potentially starve
+    the other tasks for long periods of time, based on the BGP table
+    size and number of nexthops.
+
+(2) Convergence around routing changes that affect the nexthops can be
+    long (around a minute with the default intervals). The interval
+    can be shortened to achieve faster reaction time, but it makes the
+    first problem worse, with the scanner task consuming most of the
+    CPU resources.
+
+"Next hop tracking" feature makes this process event-driven. It
+eliminates periodic nexthop validation and introduces an asynchronous
+communication path between BGP and Zebra for route change notifications
+that can then be acted upon. 
+
+2. Goal
+
+Stating the obvious, the main goal is to remove the two limitations we
+discussed in the previous section. The goals, in a constructive tone,
+are the following:
+
+- fairness: the scanner run should not consume an unjustly high amount
+  of CPU time. This should give an overall good performance and
+  response time to other events (route changes, session events,
+  IO/user interface).
+
+- convergence: BGP must react to nexthop changes instantly and provide
+  sub-second convergence. This may involve diverting the routes from
+  one nexthop to another.
+
+3. Overview of the changes
+
+The changes are in both BGP and Zebra modules.  The short summary is
+the following:
+
+- Zebra implements a registration mechanism by which clients can
+   register for next hop notification. Consequently, it maintains a
+   separate table, per (VRF, AF) pair, of next hops and interested
+   client-list per next hop.
+
+- When the main routing table changes in Zebra, it evaluates the next
+   hop table: for each next hop, it checks if the route table
+   modifications have changed its state. If so, it notifies the
+   interested clients.
+
+- BGP is one such client. It registers the next hops corresponding to
+   all of its received routes/paths. It also threads the paths against
+   each nexthop structure.
+
+- When BGP receives a next hop notification from Zebra, it walks the
+   corresponding path list. It makes them valid or invalid depending
+   on the next hop notification. It then re-computes best path for the
+   corresponding destination. This may result in re-announcing those
+   destinations to peers.
+
+4. Design
+
+4.1. Modules
+
+The core design introduces an "nht" (next hop tracking) module in BGP
+and "rnh" (recursive nexthop) module in Zebra. The "nht" module
+provides the following APIs:
+
+bgp_find_or_add_nexthop() : find or add a nexthop in BGP nexthop table
+bgp_find_nexthop() : find a nexthop in BGP nexthop table
+bgp_parse_nexthop_update() : parse a nexthop update message coming
+                              from zebra
+
+The "rnh" module provides the following APIs:
+
+zebra_add_rnh() : add a recursive nexthop
+zebra_delete_rnh() : delete a recursive nexthop
+zebra_lookup_rnh() : lookup a recursive nexthop
+
+zebra_add_rnh_client() : register a client for nexthop notifications
+                         against a recursive nexthop
+
+zebra_remove_rnh_client(): remove the client registration for a
+                            recursive nexthop
+
+zebra_evaluate_rnh_table(): (re)evaluate the recursive nexthop table
+                            (most probably because the main routing
+                            table has changed).
+
+zebra_cleanup_rnh_client(): Cleanup a client from the "rnh" module
+                            data structures (most probably because the
+                            client is going away).
+
+4.2. Control flow
+
+The next hop registration control flow is the following:
+
+<====      BGP Process       ====>|<====      Zebra Process      ====>
+                                  |
+receive module     nht module     |  zserv module        rnh module
+----------------------------------------------------------------------
+              |                   |                  |
+bgp_update_   |                   |                  |
+      main()  | bgp_find_or_add_  |                  |
+              |        nexthop()  |                  |
+              |                   |                  |
+              |                   | zserv_nexthop_   |
+              |                   |       register() |
+              |                   |                  | zebra_add_rnh()
+              |                   |                  |
+
+
+The next hop notification control flow is the following:
+
+<====     Zebra Process    ====>|<====      BGP Process       ====>
+                                |
+rib module         rnh module   |     zebra module        nht module
+----------------------------------------------------------------------
+              |                 |                   |
+meta_queue_   |                 |                   |
+    process() | zebra_evaluate_ |                   |
+              |     rnh_table() |                   |
+              |                 |                   |
+              |                 | bgp_read_nexthop_ |
+              |                 |          update() |
+              |                 |                   | bgp_parse_
+              |                 |                   | nexthop_update()
+              |                 |                   |
+
+
+4.3. zclient message format
+
+ZEBRA_NEXTHOP_REGISTER and ZEBRA_NEXTHOP_UNREGISTER messages are
+encoded in the following way:
+
+/*
+ *     0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |     AF                        |  prefix len   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .      Nexthop prefix                                           .
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .                                                               .
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |     AF                        |  prefix len   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .      Nexthop prefix                                           .
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+ZEBRA_NEXTHOP_UPDATE message is encoded as follows:
+
+/*
+ *     0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |     AF                        |  prefix len   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .      Nexthop prefix getting resolved                          .
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |        metric                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  #nexthops    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | nexthop type  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .      resolving Nexthop details                                .
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .                                                               .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | nexthop type  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * .      resolving Nexthop details                                .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+4.4. BGP data structure
+
+Legend:
+
+/\   struct bgp_node: a BGP destination/route/prefix
+\/
+
+[ ]  struct bgp_info: a BGP path (e.g. route received from a peer)
+
+ _
+(_)  struct bgp_nexthop_cache: a BGP nexthop
+
+
+
+   /\         NULL
+   \/--+        ^
+       |        :
+       +--[ ]--[ ]--[ ]--> NULL
+   /\           :
+   \/--+        :
+       |        :
+       +--[ ]--[ ]--> NULL
+                :
+  _             :
+ (_).............
+
+
+4.5. Zebra data structure
+
+rnh table:
+
+           O
+          / \
+         O   O
+            / \
+           O   O
+
+        struct rnh
+        {
+          u_char flags;
+          struct rib *state;
+          struct list *client_list;
+          struct route_node *node;
+        };
+
+5. User interface changes
+
+quagga# show ip nht
+3.3.3.3
+ resolved via kernel
+ via 11.0.0.6, swp1
+ Client list: bgp(fd 12)
+11.0.0.10
+ resolved via connected
+ is directly connected, swp2
+ Client list: bgp(fd 12)
+11.0.0.18
+ resolved via connected
+ is directly connected, swp4
+ Client list: bgp(fd 12)
+11.11.11.11
+ resolved via kernel
+ via 10.0.1.2, eth0
+ Client list: bgp(fd 12)
+
+quagga# show ip bgp nexthop
+Current BGP nexthop cache:
+ 3.3.3.3 valid [IGP metric 0], #paths 3
+  Last update: Wed Oct 16 04:43:49 2013
+
+ 11.0.0.10 valid [IGP metric 1], #paths 1
+  Last update: Wed Oct 16 04:43:51 2013
+
+ 11.0.0.18 valid [IGP metric 1], #paths 2
+  Last update: Wed Oct 16 04:43:47 2013
+
+ 11.11.11.11 valid [IGP metric 0], #paths 1
+  Last update: Wed Oct 16 04:43:47 2013
+
+quagga# show ipv6 nht
+quagga# show ip bgp nexthop detail
+
+quagga# debug bgp nht
+quagga# debug zebra nht
+
+6. Sample test cases
+
+     r2----r3
+    /  \  /
+  r1----r4
+
+- Verify that a change in IGP cost triggers NHT
+  + shutdown the r1-r4 and r2-r4 links
+  + no shut the r1-r4 and r2-r4 links and wait for OSPF to come back
+    up
+  + We should be back to the original nexthop via r4 now
+- Verify that a NH becoming unreachable triggers NHT
+  + Shutdown all links to r4
+- Verify that a NH becoming reachable triggers NHT
+  + no shut all links to r4
+
+7. Future work
+
+- route-policy for next hop validation (e.g. ignore default route)
+- damping for rapid next hop changes
+- prioritized handling of nexthop changes ((un)reachability vs. metric
+  changes)
+- handling recursion loop, e.g.
+   11.11.11.11/32 -> 12.12.12.12
+   12.12.12.12/32 -> 11.11.11.11
+   11.0.0.0/8 -> <interface>
+- better statistics
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 9ab0daf..2c83e6b 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -12,7 +12,7 @@ libzebra_la_SOURCES = \
 	sockunion.c prefix.c thread.c if.c memory.c buffer.c table.c hash.c \
 	filter.c routemap.c distribute.c stream.c str.c log.c plist.c \
 	zclient.c sockopt.c smux.c agentx.c snmp.c md5.c if_rmap.c keychain.c privs.c \
-	sigevent.c pqueue.c jhash.c memtypes.c workqueue.c timerwheel.c
+	sigevent.c pqueue.c jhash.c memtypes.c workqueue.c timerwheel.c nexthop.c
 
 BUILT_SOURCES = memtypes.h route_types.h gitversion.h
 
@@ -27,7 +27,7 @@ pkginclude_HEADERS = \
 	str.h stream.h table.h thread.h vector.h version.h vty.h zebra.h \
 	plist.h zclient.h sockopt.h smux.h md5.h if_rmap.h keychain.h \
 	privs.h sigevent.h pqueue.h jhash.h zassert.h memtypes.h \
-	workqueue.h route_types.h libospf.h timerwheel.h
+	workqueue.h route_types.h libospf.h timerwheel.h nexthop.h
 
 EXTRA_DIST = \
 	regex.c regex-gnu.h \
diff --git a/lib/log.c b/lib/log.c
index e4ec7c2..5ae3872 100644
--- a/lib/log.c
+++ b/lib/log.c
@@ -845,6 +845,9 @@ static const struct zebra_desc_table command_types[] = {
   DESC_ENTRY	(ZEBRA_ROUTER_ID_DELETE),
   DESC_ENTRY	(ZEBRA_ROUTER_ID_UPDATE),
   DESC_ENTRY	(ZEBRA_HELLO),
+  DESC_ENTRY	(ZEBRA_NEXTHOP_REGISTER),
+  DESC_ENTRY	(ZEBRA_NEXTHOP_UNREGISTER),
+  DESC_ENTRY	(ZEBRA_NEXTHOP_UPDATE),
 };
 #undef DESC_ENTRY
 
diff --git a/lib/memtypes.c b/lib/memtypes.c
index 62a1481..99bdca1 100644
--- a/lib/memtypes.c
+++ b/lib/memtypes.c
@@ -85,6 +85,7 @@ struct memory_list memory_list_zebra[] =
   { MTYPE_STATIC_IPV6,		"Static IPv6 route"		},
   { MTYPE_RIB_DEST,		"RIB destination"		},
   { MTYPE_RIB_TABLE_INFO,	"RIB table info"		},
+  { MTYPE_RNH,		        "Nexthop tracking object"	},
   { -1, NULL },
 };
 
diff --git a/lib/nexthop.c b/lib/nexthop.c
new file mode 100644
index 0000000..213ebfd
--- /dev/null
+++ b/lib/nexthop.c
@@ -0,0 +1,109 @@
+/* A generic nexthop structure
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+#include <zebra.h>
+
+#include "prefix.h"
+#include "table.h"
+#include "memory.h"
+#include "str.h"
+#include "command.h"
+#include "if.h"
+#include "log.h"
+#include "sockunion.h"
+#include "linklist.h"
+#include "thread.h"
+#include "prefix.h"
+#include "nexthop.h"
+
+/* Free nexthop. */
+void
+nexthop_free (struct nexthop *nexthop)
+{
+  if (nexthop->ifname)
+    XFREE (0, nexthop->ifname);
+  XFREE (MTYPE_NEXTHOP, nexthop);
+}
+
+int
+nexthop_same (struct nexthop *next1, struct nexthop *next2)
+{
+  if (next1->type != next2->type)
+    return 0;
+
+  switch (next1->type)
+    {
+    case NEXTHOP_TYPE_IPV4:
+    case NEXTHOP_TYPE_IPV4_IFINDEX:
+    case NEXTHOP_TYPE_IPV4_IFINDEX_OL:
+      if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4))
+	return 0;
+      if (next1->ifindex && (next1->ifindex != next2->ifindex))
+	return 0;
+      break;
+    case NEXTHOP_TYPE_IFINDEX:
+    case NEXTHOP_TYPE_IFNAME:
+      if (next1->ifindex != next2->ifindex)
+	return 0;
+      break;
+#ifdef HAVE_IPV6
+    case NEXTHOP_TYPE_IPV6:
+      if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
+	return 0;
+      break;
+    case NEXTHOP_TYPE_IPV6_IFINDEX:
+    case NEXTHOP_TYPE_IPV6_IFNAME:
+      if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
+	return 0;
+      if (next1->ifindex != next2->ifindex)
+	return 0;
+      break;
+#endif /* HAVE_IPV6 */
+    default:
+      /* do nothing */
+      break;
+    }
+  return 1;
+}
+
+/*
+ * nexthop_type_to_str
+ */
+const char *
+nexthop_type_to_str (enum nexthop_types_t nh_type)
+{
+  static const char *desc[] = {
+    "none",
+    "Directly connected",
+    "Interface route",
+    "IPv4 nexthop",
+    "IPv4 nexthop with ifindex",
+    "IPv4 nexthop with ifname",
+    "IPv6 nexthop",
+    "IPv6 nexthop with ifindex",
+    "IPv6 nexthop with ifname",
+    "Null0 nexthop",
+  };
+
+  if (nh_type >= ZEBRA_NUM_OF (desc))
+    return "<Invalid nh type>";
+
+  return desc[nh_type];
+}
diff --git a/lib/nexthop.h b/lib/nexthop.h
new file mode 100644
index 0000000..dd4bfe1
--- /dev/null
+++ b/lib/nexthop.h
@@ -0,0 +1,90 @@
+/*
+ * Nexthop structure definition.
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _LIB_NEXTHOP_H
+#define _LIB_NEXTHOP_H
+
+#include "prefix.h"
+
+union g_addr {
+  struct in_addr ipv4;
+#ifdef HAVE_IPV6
+  struct in6_addr ipv6;
+#endif /* HAVE_IPV6 */
+};
+
+enum nexthop_types_t
+{
+  NEXTHOP_TYPE_IFINDEX = 1,      /* Directly connected.  */
+  NEXTHOP_TYPE_IFNAME,           /* Interface route.  */
+  NEXTHOP_TYPE_IPV4,             /* IPv4 nexthop.  */
+  NEXTHOP_TYPE_IPV4_IFINDEX,     /* IPv4 nexthop with ifindex.  */
+  NEXTHOP_TYPE_IPV4_IFNAME,      /* IPv4 nexthop with ifname.  */
+  NEXTHOP_TYPE_IPV6,             /* IPv6 nexthop.  */
+  NEXTHOP_TYPE_IPV6_IFINDEX,     /* IPv6 nexthop with ifindex.  */
+  NEXTHOP_TYPE_IPV6_IFNAME,      /* IPv6 nexthop with ifname.  */
+  NEXTHOP_TYPE_BLACKHOLE,        /* Null0 nexthop.  */
+  NEXTHOP_TYPE_IPV4_IFINDEX_OL,
+  /* IPv4 nexthop with ifindex, which does not require the gate address
+   * to belong to a connected network of the given interface. Used for
+   * mesh routing. */
+};
+
+/* Nexthop structure. */
+struct nexthop
+{
+  struct nexthop *next;
+  struct nexthop *prev;
+
+  /* Interface index. */
+  char *ifname;
+  unsigned int ifindex;
+
+  enum nexthop_types_t type;
+
+  u_char flags;
+#define NEXTHOP_FLAG_ACTIVE     (1 << 0) /* This nexthop is alive. */
+#define NEXTHOP_FLAG_FIB        (1 << 1) /* FIB nexthop. */
+#define NEXTHOP_FLAG_RECURSIVE  (1 << 2) /* Recursive nexthop. */
+#define NEXTHOP_FLAG_MATCHED    (1 << 3) /* Already matched vs a nexthop */
+
+  /* Nexthop address or interface name. */
+  union g_addr gate;
+
+  /* Recursive lookup nexthop. */
+  u_char rtype;
+  unsigned int rifindex;
+  union g_addr rgate;
+  union g_addr src;
+};
+
+#define nexthop_new()                                                   \
+({                                                                      \
+  struct nexthop *n = XCALLOC (MTYPE_NEXTHOP, sizeof (struct nexthop)); \
+  n;                                                                    \
+})
+
+extern void nexthop_free(struct nexthop *nh);
+extern int nexthop_same(struct nexthop *nh1, struct nexthop *nh2);
+extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type);
+
+#endif /*_LIB_NEXTHOP_H */
diff --git a/lib/zclient.c b/lib/zclient.c
index 856fea5..5ac6651 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -999,6 +999,12 @@ zclient_read (struct thread *thread)
       if (zclient->ipv6_route_delete)
 	(*zclient->ipv6_route_delete) (command, zclient, length);
       break;
+    case ZEBRA_NEXTHOP_UPDATE:
+      if (zclient_debug)
+	zlog_debug("zclient rcvd nexthop update\n");
+      if (zclient->nexthop_update)
+	(*zclient->nexthop_update) (command, zclient, length);
+      break;
     default:
       break;
     }
diff --git a/lib/zclient.h b/lib/zclient.h
index a082655..773db02 100644
--- a/lib/zclient.h
+++ b/lib/zclient.h
@@ -82,6 +82,7 @@ struct zclient
   int (*ipv4_route_delete) (int, struct zclient *, uint16_t);
   int (*ipv6_route_add) (int, struct zclient *, uint16_t);
   int (*ipv6_route_delete) (int, struct zclient *, uint16_t);
+  int (*nexthop_update) (int, struct zclient *, uint16_t);
 };
 
 /* Zebra API message flag. */
diff --git a/lib/zebra.h b/lib/zebra.h
index d4e6f19..4b8040e 100644
--- a/lib/zebra.h
+++ b/lib/zebra.h
@@ -425,7 +425,10 @@ struct in_pktinfo
 #define ZEBRA_ROUTER_ID_DELETE            21
 #define ZEBRA_ROUTER_ID_UPDATE            22
 #define ZEBRA_HELLO                       23
-#define ZEBRA_MESSAGE_MAX                 24
+#define ZEBRA_NEXTHOP_REGISTER            24
+#define ZEBRA_NEXTHOP_UNREGISTER          25
+#define ZEBRA_NEXTHOP_UPDATE              26
+#define ZEBRA_MESSAGE_MAX                 27
 
 /* Marker value used in new Zserv, in the byte location corresponding
  * the command value in the old zserv header. To allow old and new
@@ -528,6 +531,7 @@ extern const char *zserv_command_string (unsigned int command);
 #define CHECK_FLAG(V,F)      ((V) & (F))
 #define SET_FLAG(V,F)        (V) |= (F)
 #define UNSET_FLAG(V,F)      (V) &= ~(F)
+#define RESET_FLAG(V)        (V) = 0
 
 /* AFI and SAFI type. */
 typedef u_int16_t afi_t;
diff --git a/zebra/Makefile.am b/zebra/Makefile.am
index 22d891a..50aec74 100644
--- a/zebra/Makefile.am
+++ b/zebra/Makefile.am
@@ -34,16 +34,17 @@ zebra_SOURCES = \
 	zserv.c main.c interface.c connected.c zebra_rib.c zebra_routemap.c \
 	redistribute.c debug.c rtadv.c zebra_snmp.c zebra_vty.c \
 	irdp_main.c irdp_interface.c irdp_packet.c router-id.c zebra_fpm.c \
-	zebra_ptm.c $(othersrc)
+	zebra_ptm.c zebra_rnh.c $(othersrc)
 
 testzebra_SOURCES = test_main.c zebra_rib.c interface.c connected.c debug.c \
 	zebra_vty.c zebra_ptm.c \
-	kernel_null.c  redistribute_null.c ioctl_null.c misc_null.c
+	kernel_null.c  redistribute_null.c ioctl_null.c misc_null.c \
+        zebra_rnh_null.c
 
 noinst_HEADERS = \
 	connected.h ioctl.h rib.h rt.h zserv.h redistribute.h debug.h rtadv.h \
 	interface.h ipforward.h irdp.h router-id.h kernel_socket.h \
-	rt_netlink.h zebra_fpm.h zebra_fpm_private.h
+	rt_netlink.h zebra_fpm.h zebra_fpm_private.h zebra_rnh.h
 
 zebra_LDADD = $(otherobj) ../lib/libzebra.la $(LIBCAP) $(LIB_IPV6)
 
diff --git a/zebra/debug.c b/zebra/debug.c
index 7bfdb77..b37cf3f 100644
--- a/zebra/debug.c
+++ b/zebra/debug.c
@@ -30,6 +30,7 @@ unsigned long zebra_debug_packet;
 unsigned long zebra_debug_kernel;
 unsigned long zebra_debug_rib;
 unsigned long zebra_debug_fpm;
+unsigned long zebra_debug_nht;
 
 DEFUN (show_debugging_zebra,
        show_debugging_zebra_cmd,
@@ -74,6 +75,8 @@ DEFUN (show_debugging_zebra,
 
   if (IS_ZEBRA_DEBUG_FPM)
     vty_out (vty, "  Zebra FPM debugging is on%s", VTY_NEWLINE);
+  if (IS_ZEBRA_DEBUG_NHT)
+    vty_out (vty, "  Zebra next-hop tracking debugging is on%s", VTY_NEWLINE);
 
   return CMD_SUCCESS;
 }
@@ -89,6 +92,17 @@ DEFUN (debug_zebra_events,
   return CMD_WARNING;
 }
 
+DEFUN (debug_zebra_nht,
+       debug_zebra_nht_cmd,
+       "debug zebra nht",
+       DEBUG_STR
+       "Zebra configuration\n"
+       "Debug option set for zebra next hop tracking\n")
+{
+  zebra_debug_nht = ZEBRA_DEBUG_NHT;
+  return CMD_WARNING;
+}
+
 DEFUN (debug_zebra_packet,
        debug_zebra_packet_cmd,
        "debug zebra packet",
@@ -196,6 +210,18 @@ DEFUN (no_debug_zebra_events,
   return CMD_SUCCESS;
 }
 
+DEFUN (no_debug_zebra_nht,
+       no_debug_zebra_nht_cmd,
+       "no debug zebra nht",
+       NO_STR
+       DEBUG_STR
+       "Zebra configuration\n"
+       "Debug option set for zebra next hop tracking\n")
+{
+  zebra_debug_nht = 0;
+  return CMD_SUCCESS;
+}
+
 DEFUN (no_debug_zebra_packet,
        no_debug_zebra_packet_cmd,
        "no debug zebra packet",
@@ -352,6 +378,7 @@ zebra_debug_init (void)
 
   install_element (ENABLE_NODE, &show_debugging_zebra_cmd);
   install_element (ENABLE_NODE, &debug_zebra_events_cmd);
+  install_element (ENABLE_NODE, &debug_zebra_nht_cmd);
   install_element (ENABLE_NODE, &debug_zebra_packet_cmd);
   install_element (ENABLE_NODE, &debug_zebra_packet_direct_cmd);
   install_element (ENABLE_NODE, &debug_zebra_packet_detail_cmd);
@@ -360,6 +387,7 @@ zebra_debug_init (void)
   install_element (ENABLE_NODE, &debug_zebra_rib_q_cmd);
   install_element (ENABLE_NODE, &debug_zebra_fpm_cmd);
   install_element (ENABLE_NODE, &no_debug_zebra_events_cmd);
+  install_element (ENABLE_NODE, &no_debug_zebra_nht_cmd);
   install_element (ENABLE_NODE, &no_debug_zebra_packet_cmd);
   install_element (ENABLE_NODE, &no_debug_zebra_kernel_cmd);
   install_element (ENABLE_NODE, &no_debug_zebra_rib_cmd);
@@ -367,6 +395,7 @@ zebra_debug_init (void)
   install_element (ENABLE_NODE, &no_debug_zebra_fpm_cmd);
 
   install_element (CONFIG_NODE, &debug_zebra_events_cmd);
+  install_element (CONFIG_NODE, &debug_zebra_nht_cmd);
   install_element (CONFIG_NODE, &debug_zebra_packet_cmd);
   install_element (CONFIG_NODE, &debug_zebra_packet_direct_cmd);
   install_element (CONFIG_NODE, &debug_zebra_packet_detail_cmd);
@@ -375,6 +404,7 @@ zebra_debug_init (void)
   install_element (CONFIG_NODE, &debug_zebra_rib_q_cmd);
   install_element (CONFIG_NODE, &debug_zebra_fpm_cmd);
   install_element (CONFIG_NODE, &no_debug_zebra_events_cmd);
+  install_element (CONFIG_NODE, &no_debug_zebra_nht_cmd);
   install_element (CONFIG_NODE, &no_debug_zebra_packet_cmd);
   install_element (CONFIG_NODE, &no_debug_zebra_kernel_cmd);
   install_element (CONFIG_NODE, &no_debug_zebra_rib_cmd);
diff --git a/zebra/debug.h b/zebra/debug.h
index d9231a2..0fb4dd9 100644
--- a/zebra/debug.h
+++ b/zebra/debug.h
@@ -37,6 +37,7 @@
 #define ZEBRA_DEBUG_RIB_Q   0x02
 
 #define ZEBRA_DEBUG_FPM     0x01
+#define ZEBRA_DEBUG_NHT     0x01
 
 /* Debug related macro. */
 #define IS_ZEBRA_DEBUG_EVENT  (zebra_debug_event & ZEBRA_DEBUG_EVENT)
@@ -52,12 +53,14 @@
 #define IS_ZEBRA_DEBUG_RIB_Q  (zebra_debug_rib & ZEBRA_DEBUG_RIB_Q)
 
 #define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM)
+#define IS_ZEBRA_DEBUG_NHT  (zebra_debug_nht & ZEBRA_DEBUG_NHT)
 
 extern unsigned long zebra_debug_event;
 extern unsigned long zebra_debug_packet;
 extern unsigned long zebra_debug_kernel;
 extern unsigned long zebra_debug_rib;
 extern unsigned long zebra_debug_fpm;
+extern unsigned long zebra_debug_nht;
 
 extern void zebra_debug_init (void);
 
diff --git a/zebra/rib.h b/zebra/rib.h
index b6a4ac2..04ab127 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -31,13 +31,6 @@
 
 /* Routing information base. */
 
-union g_addr {
-  struct in_addr ipv4;
-#ifdef HAVE_IPV6
-  struct in6_addr ipv6;
-#endif /* HAVE_IPV6 */
-};
-
 struct rib
 {
   /* Link list. */
@@ -224,50 +217,6 @@ struct static_ipv6
 };
 #endif /* HAVE_IPV6 */
 
-enum nexthop_types_t
-{
-  NEXTHOP_TYPE_IFINDEX = 1,      /* Directly connected.  */
-  NEXTHOP_TYPE_IFNAME,           /* Interface route.  */
-  NEXTHOP_TYPE_IPV4,             /* IPv4 nexthop.  */
-  NEXTHOP_TYPE_IPV4_IFINDEX,     /* IPv4 nexthop with ifindex.  */
-  NEXTHOP_TYPE_IPV4_IFNAME,      /* IPv4 nexthop with ifname.  */
-  NEXTHOP_TYPE_IPV6,             /* IPv6 nexthop.  */
-  NEXTHOP_TYPE_IPV6_IFINDEX,     /* IPv6 nexthop with ifindex.  */
-  NEXTHOP_TYPE_IPV6_IFNAME,      /* IPv6 nexthop with ifname.  */
-  NEXTHOP_TYPE_BLACKHOLE,        /* Null0 nexthop.  */
-  NEXTHOP_TYPE_IPV4_IFINDEX_OL,
-  /* IPv4 nexthop with ifindex, which does not require the gate address
-   * to belong to a connected network of the given interface. Used for
-   * mesh routing. */
-};
-
-/* Nexthop structure. */
-struct nexthop
-{
-  struct nexthop *next;
-  struct nexthop *prev;
-
-  /* Interface index. */
-  char *ifname;
-  unsigned int ifindex;
-  
-  enum nexthop_types_t type;
-
-  u_char flags;
-#define NEXTHOP_FLAG_ACTIVE     (1 << 0) /* This nexthop is alive. */
-#define NEXTHOP_FLAG_FIB        (1 << 1) /* FIB nexthop. */
-#define NEXTHOP_FLAG_RECURSIVE  (1 << 2) /* Recursive nexthop. */
-
-  /* Nexthop address or interface name. */
-  union g_addr gate;
-
-  /* Recursive lookup nexthop. */
-  u_char rtype;
-  unsigned int rifindex;
-  union g_addr rgate;
-  union g_addr src;
-};
-
 /* Routing table instance.  */
 struct vrf
 {
@@ -288,6 +237,9 @@ struct vrf
 
   /* Static route configuration.  */
   struct route_table *stable[AFI_MAX][SAFI_MAX];
+
+  /* Recursive Nexthop table */
+  struct route_table *rnh_table[AFI_MAX];
 };
 
 /*
@@ -327,7 +279,6 @@ typedef struct rib_tables_iter_t_
   rib_tables_iter_state_t state;
 } rib_tables_iter_t;
 
-extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type);
 extern struct nexthop *nexthop_ifindex_add (struct rib *, unsigned int);
 extern struct nexthop *nexthop_ifname_add (struct rib *, char *);
 extern struct nexthop *nexthop_blackhole_add (struct rib *);
@@ -428,6 +379,7 @@ static_delete_ipv6 (struct prefix *p, u_char type, struct in6_addr *gate,
 
 extern int rib_gc_dest (struct route_node *rn);
 extern struct route_table *rib_tables_iter_next (rib_tables_iter_t *iter);
+extern void nexthop_add(struct rib *rib, struct nexthop *nh);
 
 /*
  * Inline functions.
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
index 95fb217..4b20241 100644
--- a/zebra/rt_netlink.c
+++ b/zebra/rt_netlink.c
@@ -36,6 +36,7 @@
 #include "rib.h"
 #include "thread.h"
 #include "privs.h"
+#include "nexthop.h"
 
 #include "zebra/zserv.h"
 #include "zebra/rt.h"
diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c
index 67bcf0a..ebf042f 100644
--- a/zebra/zebra_fpm_netlink.c
+++ b/zebra/zebra_fpm_netlink.c
@@ -29,6 +29,7 @@
 #include "rib.h"
 
 #include "rt_netlink.h"
+#include "nexthop.h"
 
 #include "zebra_fpm_private.h"
 
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 2ea4d61..672e1a3 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -34,6 +34,7 @@
 #include "workqueue.h"
 #include "prefix.h"
 #include "routemap.h"
+#include "nexthop.h"
 
 #include "zebra/rib.h"
 #include "zebra/rt.h"
@@ -41,6 +42,7 @@
 #include "zebra/redistribute.h"
 #include "zebra/debug.h"
 #include "zebra/zebra_fpm.h"
+#include "zebra/zebra_rnh.h"
 
 /* Default rtm_table for all clients */
 extern struct zebra_t zebrad;
@@ -118,6 +120,8 @@ vrf_alloc (const char *name)
   vrf->stable[AFI_IP][SAFI_MULTICAST] = route_table_init ();
   vrf->stable[AFI_IP6][SAFI_MULTICAST] = route_table_init ();
 
+  vrf->rnh_table[AFI_IP] = route_table_init();
+  vrf->rnh_table[AFI_IP6] = route_table_init();
 
   return vrf;
 }
@@ -177,33 +181,8 @@ vrf_static_table (afi_t afi, safi_t safi, u_int32_t id)
   return vrf->stable[afi][safi];
 }
 
-/*
- * nexthop_type_to_str
- */
-const char *
-nexthop_type_to_str (enum nexthop_types_t nh_type)
-{
-  static const char *desc[] = {
-    "none",
-    "Directly connected",
-    "Interface route",
-    "IPv4 nexthop",
-    "IPv4 nexthop with ifindex",
-    "IPv4 nexthop with ifname",
-    "IPv6 nexthop",
-    "IPv6 nexthop with ifindex",
-    "IPv6 nexthop with ifname",
-    "Null0 nexthop",
-  };
-
-  if (nh_type >= ZEBRA_NUM_OF (desc))
-    return "<Invalid nh type>";
-
-  return desc[nh_type];
-}
-
 /* Add nexthop to the end of the list.  */
-static void
+void
 nexthop_add (struct rib *rib, struct nexthop *nexthop)
 {
   struct nexthop *last;
@@ -232,15 +211,6 @@ nexthop_delete (struct rib *rib, struct nexthop *nexthop)
   rib->nexthop_num--;
 }
 
-/* Free nexthop. */
-static void
-nexthop_free (struct nexthop *nexthop)
-{
-  if (nexthop->ifname)
-    XFREE (0, nexthop->ifname);
-  XFREE (MTYPE_NEXTHOP, nexthop);
-}
-
 struct nexthop *
 nexthop_ifindex_add (struct rib *rib, unsigned int ifindex)
 {
@@ -1374,6 +1344,11 @@ meta_queue_process (struct work_queue *dummy, void *data)
 	mq->size--;
 	break;
       }
+
+  zebra_evaluate_rnh_table(0, AF_INET);
+#ifdef HAVE_IPV6
+  zebra_evaluate_rnh_table(0, AF_INET6);
+#endif /* HAVE_IPV6 */
   return mq->size ? WQ_REQUEUE : WQ_SUCCESS;
 }
 
diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c
new file mode 100644
index 0000000..3815567
--- /dev/null
+++ b/zebra/zebra_rnh.c
@@ -0,0 +1,606 @@
+/* Zebra next hop tracking code
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "prefix.h"
+#include "table.h"
+#include "memory.h"
+#include "str.h"
+#include "command.h"
+#include "if.h"
+#include "log.h"
+#include "sockunion.h"
+#include "linklist.h"
+#include "thread.h"
+#include "workqueue.h"
+#include "prefix.h"
+#include "routemap.h"
+#include "stream.h"
+#include "nexthop.h"
+
+#include "zebra/rib.h"
+#include "zebra/rt.h"
+#include "zebra/zserv.h"
+#include "zebra/redistribute.h"
+#include "zebra/debug.h"
+#include "zebra/zebra_rnh.h"
+
+#define lookup_rnh_table(v, f)		         \
+({						 \
+  struct vrf *vrf;                               \
+  struct route_table *t = NULL;                  \
+  vrf = vrf_lookup(v);                           \
+  if (vrf)                                       \
+    t = vrf->rnh_table[family2afi(f)];	         \
+  t;                                             \
+})
+
+static void free_state(struct rib *rib);
+static void copy_state(struct rnh *rnh, struct rib *rib);
+static int compare_state(struct rib *r1, struct rib *r2);
+static int send_client(struct rnh *rnh, struct zserv *client);
+static void print_rnh(struct route_node *rn, struct vty *vty);
+
+char *
+rnh_str (struct rnh *rnh, char *buf, int size)
+{
+  prefix2str(&(rnh->node->p), buf, size);
+  return buf;
+}
+
+struct rnh *
+zebra_add_rnh (struct prefix *p, u_int32_t vrfid)
+{
+  struct route_table *table;
+  struct route_node *rn;
+  struct rnh *rnh = NULL;
+
+  if (IS_ZEBRA_DEBUG_NHT)
+    {
+      char buf[INET6_ADDRSTRLEN];
+      prefix2str(p, buf, INET6_ADDRSTRLEN);
+      zlog_debug("add rnh %s in vrf %d", buf, vrfid);
+    }
+  table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p));
+  if (!table)
+    {
+      zlog_debug("add_rnh: rnh table not found\n");
+      return NULL;
+    }
+
+  /* Make it sure prefixlen is applied to the prefix. */
+  apply_mask (p);
+
+  /* Lookup (or add) route node.*/
+  rn = route_node_get (table, p);
+
+  if (!rn->info)
+    {
+      rnh = XCALLOC(MTYPE_RNH, sizeof(struct rnh));
+      rnh->client_list = list_new();
+      route_lock_node (rn);
+      rn->info = rnh;
+      rnh->node = rn;
+    }
+
+  route_unlock_node (rn);
+  return (rn->info);
+}
+
+struct rnh *
+zebra_lookup_rnh (struct prefix *p, u_int32_t vrfid)
+{
+  struct route_table *table;
+  struct route_node *rn;
+
+  table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p));
+  if (!table)
+    return NULL;
+
+  /* Make it sure prefixlen is applied to the prefix. */
+  apply_mask (p);
+
+  /* Lookup route node.*/
+  rn = route_node_lookup (table, p);
+  if (!rn)
+    return NULL;
+
+  route_unlock_node (rn);
+  return (rn->info);
+}
+
+void
+zebra_delete_rnh (struct rnh *rnh)
+{
+  struct route_node *rn;
+
+  if (!rnh || !(rn = rnh->node))
+    return;
+
+  if (IS_ZEBRA_DEBUG_NHT)
+    {
+      char buf[INET6_ADDRSTRLEN];
+      zlog_debug("delete rnh %s", rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+    }
+
+  list_free(rnh->client_list);
+  free_state(rnh->state);
+  XFREE(MTYPE_RNH, rn->info);
+  rn->info = NULL;
+  route_unlock_node (rn);
+  return;
+}
+
+void
+zebra_add_rnh_client (struct rnh *rnh, struct zserv *client)
+{
+  if (IS_ZEBRA_DEBUG_NHT)
+    {
+      char buf[INET6_ADDRSTRLEN];
+      zlog_debug("client %s registers rnh %s",
+		 zebra_route_string(client->proto),
+		 rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+    }
+  if (!listnode_lookup(rnh->client_list, client))
+    {
+      listnode_add(rnh->client_list, client);
+      send_client(rnh, client);
+    }
+}
+
+void
+zebra_remove_rnh_client (struct rnh *rnh, struct zserv *client)
+{
+  if (IS_ZEBRA_DEBUG_NHT)
+    {
+      char buf[INET6_ADDRSTRLEN];
+      zlog_debug("client %s unregisters rnh %s",
+		 zebra_route_string(client->proto),
+		 rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+    }
+  listnode_delete(rnh->client_list, client);
+  if (list_isempty(rnh->client_list))
+    zebra_delete_rnh(rnh);
+}
+
+int
+zebra_evaluate_rnh_table (int vrfid, int family)
+{
+  struct route_table *ptable;
+  struct route_table *ntable;
+  struct route_node *prn;
+  struct route_node *nrn;
+  struct rnh *rnh;
+  struct zserv *client;
+  struct listnode *node;
+  struct rib *rib;
+
+  ntable = lookup_rnh_table(vrfid, family);
+  if (!ntable)
+    {
+      zlog_debug("evaluate_rnh_table: rnh table not found\n");
+      return -1;
+    }
+
+  ptable = vrf_table(family2afi(family), SAFI_UNICAST, vrfid);
+  if (!ptable)
+    {
+      zlog_debug("evaluate_rnh_table: prefix table not found\n");
+      return -1;
+    }
+
+  for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+    {
+      if (!nrn->info)
+	  continue;
+
+      prn = route_node_match(ptable, &nrn->p);
+      if (!prn)
+	rib = NULL;
+      else
+	{
+	  RNODE_FOREACH_RIB(prn, rib)
+	    {
+	      if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
+		continue;
+	      if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
+		break;
+	    }
+	}
+
+      rnh = nrn->info;
+      if (compare_state(rib, rnh->state))
+	{
+	  if (IS_ZEBRA_DEBUG_NHT)
+	    {
+	      char bufn[INET6_ADDRSTRLEN];
+	      char bufp[INET6_ADDRSTRLEN];
+	      prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+	      if (prn)
+		prefix2str(&prn->p, bufp, INET6_ADDRSTRLEN);
+	      else
+		strcpy(bufp, "null");
+	      zlog_debug("rnh %s resolved through route %s - sending "
+			 "nexthop %s event to clients", bufn, bufp,
+			 rib ? "reachable" : "unreachable");
+	    }
+	  copy_state(rnh, rib);
+	  for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client))
+	    send_client(rnh, client);
+	}
+    }
+  return 1;
+}
+
+int
+zebra_dispatch_rnh_table (int vrfid, int family, struct zserv *client)
+{
+  struct route_table *ntable;
+  struct route_node *nrn;
+  struct rnh *rnh;
+
+  ntable = lookup_rnh_table(vrfid, family);
+  if (!ntable)
+    {
+      zlog_debug("dispatch_rnh_table: rnh table not found\n");
+      return -1;
+    }
+
+  for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+    {
+      if (!nrn->info)
+	  continue;
+
+      rnh = nrn->info;
+      if (IS_ZEBRA_DEBUG_NHT)
+	{
+	  char bufn[INET6_ADDRSTRLEN];
+	  prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+	  zlog_debug("rnh %s - sending nexthop %s event to client %s", bufn,
+		     rnh->state ? "reachable" : "unreachable",
+		     zebra_route_string(client->proto));
+	}
+      send_client(rnh, client);
+    }
+  return 1;
+}
+
+void
+zebra_print_rnh_table (int vrfid, int af, struct vty *vty)
+{
+  struct route_table *table;
+  struct route_node *rn;
+
+  table = lookup_rnh_table(vrfid, af);
+  if (!table)
+    {
+      zlog_debug("print_rnhs: rnh table not found\n");
+      return;
+    }
+
+  for (rn = route_top(table); rn; rn = route_next(rn))
+      if (rn->info)
+	print_rnh(rn, vty);
+}
+
+int
+zebra_cleanup_rnh_client (int vrfid, int family, struct zserv *client)
+{
+  struct route_table *ntable;
+  struct route_node *nrn;
+  struct rnh *rnh;
+
+  ntable = lookup_rnh_table(vrfid, family);
+  if (!ntable)
+    {
+      zlog_debug("cleanup_rnh_client: rnh table not found\n");
+      return -1;
+    }
+
+  for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+    {
+      if (!nrn->info)
+	  continue;
+
+      rnh = nrn->info;
+      if (IS_ZEBRA_DEBUG_NHT)
+	{
+	  char bufn[INET6_ADDRSTRLEN];
+	  prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+	  zlog_debug("rnh %s - cleaning state for client %s", bufn,
+		     zebra_route_string(client->proto));
+	}
+      zebra_remove_rnh_client(rnh, client);
+    }
+  return 1;
+}
+
+/**
+ * free_state - free up the rib structure associated with the rnh.
+ */
+static void
+free_state (struct rib *rib)
+{
+  struct nexthop *nexthop, *next;
+
+  if (!rib)
+    return;
+
+  /* free RIB and nexthops */
+  for (nexthop = rib->nexthop; nexthop; nexthop = next)
+    {
+      next = nexthop->next;
+      nexthop_free (nexthop);
+    }
+  XFREE (MTYPE_RIB, rib);
+}
+
+/**
+ * copy_nexthop - copy a nexthop to the rib structure.
+ */
+static void
+copy_nexthop (struct rib *state, struct nexthop *nh)
+{
+  struct nexthop *nexthop;
+
+  nexthop = nexthop_new();
+  nexthop->flags = nh->flags;
+  nexthop->type = nh->type;
+  nexthop->ifindex = nh->ifindex;
+  if (nh->ifname)
+    nexthop->ifname = XSTRDUP(0, nh->ifname);
+  memcpy(&(nexthop->gate), &(nh->gate), sizeof(union g_addr));
+  memcpy(&(nexthop->rgate), &(nh->rgate), sizeof(union g_addr));
+  memcpy(&(nexthop->src), &(nh->src), sizeof(union g_addr));
+  nexthop->rifindex = nh->rifindex;
+
+  nexthop_add(state, nexthop);
+}
+
+static void
+copy_state (struct rnh *rnh, struct rib *rib)
+{
+  struct rib *state;
+  struct nexthop *nh;
+
+  if (rnh->state)
+    {
+      free_state(rnh->state);
+      rnh->state = NULL;
+    }
+
+  if (!rib)
+    return;
+
+  state = XCALLOC (MTYPE_RIB, sizeof (struct rib));
+  state->type = rib->type;
+  state->metric = rib->metric;
+
+  for (nh = rib->nexthop; nh; nh = nh->next)
+    copy_nexthop(state, nh);
+  rnh->state = state;
+}
+
+static int
+compare_state (struct rib *r1, struct rib *r2)
+{
+  struct nexthop *nh1;
+  struct nexthop *nh2;
+  u_char found_nh = 0;
+
+  if (!r1 && !r2)
+    return 0;
+
+  if ((!r1 && r2) || (r1 && !r2))
+      return 1;
+
+  if (r1->metric != r2->metric)
+      return 1;
+
+  if (r1->nexthop_num != r2->nexthop_num)
+      return 1;
+
+  /* We need to verify that the nexthops for r1 match the nexthops for r2.
+   * Since it is possible for a rib entry to have the same nexthop multiple
+   * times (Example: [a,a]) we need to keep track of which r2 nexthops we have
+   * already used as a match against a r1 nexthop.  We track this
+   * via NEXTHOP_FLAG_MATCHED. Clear this flag for all r2 nexthops when you
+   * are finished.
+   *
+   * TRUE:  r1 [a,b], r2 [a,b]
+   * TRUE:  r1 [a,b], r2 [b,a]
+   * FALSE: r1 [a,b], r2 [a,c]
+   * FALSE: r1 [a,a], r2 [a,b]
+   */
+  for (nh1 = r1->nexthop; nh1; nh1 = nh1->next)
+    {
+      found_nh = 0;
+      for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+        {
+          if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+            continue;
+
+          if (nexthop_same(nh1, nh2))
+            {
+              SET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+              found_nh = 1;
+              break;
+            }
+        }
+
+      if (!found_nh)
+        {
+          for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+            if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+              UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+          return 1;
+        }
+    }
+
+  for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+    if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+      UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+
+  return 0;
+}
+
+static int
+send_client (struct rnh *rnh, struct zserv *client)
+{
+  struct stream *s;
+  struct rib *rib;
+  unsigned long nump;
+  u_char num;
+  struct nexthop *nexthop;
+  struct route_node *rn;
+
+  rn = rnh->node;
+  rib = rnh->state;
+
+  /* Get output stream. */
+  s = client->obuf;
+  stream_reset (s);
+
+  zserv_create_header (s, ZEBRA_NEXTHOP_UPDATE);
+
+  stream_putw(s, rn->p.family);
+  stream_put_prefix (s, &rn->p);
+
+  if (rib)
+    {
+      stream_putl (s, rib->metric);
+      num = 0;
+      nump = stream_get_endp(s);
+      stream_putc (s, 0);
+      for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
+	if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
+	  {
+	    stream_putc (s, nexthop->type);
+	    switch (nexthop->type)
+	      {
+	      case ZEBRA_NEXTHOP_IPV4:
+		stream_put_in_addr (s, &nexthop->gate.ipv4);
+		break;
+	      case ZEBRA_NEXTHOP_IFINDEX:
+	      case ZEBRA_NEXTHOP_IFNAME:
+		stream_putl (s, nexthop->ifindex);
+		break;
+	      case ZEBRA_NEXTHOP_IPV4_IFINDEX:
+	      case ZEBRA_NEXTHOP_IPV4_IFNAME:
+		stream_put_in_addr (s, &nexthop->gate.ipv4);
+		stream_putl (s, nexthop->ifindex);
+		break;
+#ifdef HAVE_IPV6
+	      case ZEBRA_NEXTHOP_IPV6:
+		stream_put (s, &nexthop->gate.ipv6, 16);
+		break;
+	      case ZEBRA_NEXTHOP_IPV6_IFINDEX:
+	      case ZEBRA_NEXTHOP_IPV6_IFNAME:
+		stream_put (s, &nexthop->gate.ipv6, 16);
+		stream_putl (s, nexthop->ifindex);
+		break;
+#endif /* HAVE_IPV6 */
+	      default:
+                /* do nothing */
+		break;
+	      }
+	    num++;
+	  }
+      stream_putc_at (s, nump, num);
+    }
+  else
+    {
+      stream_putl (s, 0);
+      stream_putc (s, 0);
+    }
+  stream_putw_at (s, 0, stream_get_endp (s));
+  return zebra_server_send_message(client);
+}
+
+static void
+print_nh (struct nexthop *nexthop, struct vty *vty)
+{
+  char buf[BUFSIZ];
+
+  switch (nexthop->type)
+    {
+    case NEXTHOP_TYPE_IPV4:
+    case NEXTHOP_TYPE_IPV4_IFINDEX:
+    case NEXTHOP_TYPE_IPV4_IFINDEX_OL:
+      vty_out (vty, " via %s", inet_ntoa (nexthop->gate.ipv4));
+      if (nexthop->ifindex)
+	vty_out (vty, ", %s", ifindex2ifname (nexthop->ifindex));
+      break;
+    case NEXTHOP_TYPE_IPV6:
+    case NEXTHOP_TYPE_IPV6_IFINDEX:
+    case NEXTHOP_TYPE_IPV6_IFNAME:
+      vty_out (vty, " %s",
+	       inet_ntop (AF_INET6, &nexthop->gate.ipv6, buf, BUFSIZ));
+      if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
+	vty_out (vty, ", %s", nexthop->ifname);
+      else if (nexthop->ifindex)
+	vty_out (vty, ", via %s", ifindex2ifname (nexthop->ifindex));
+      break;
+    case NEXTHOP_TYPE_IFINDEX:
+      vty_out (vty, " is directly connected, %s",
+	       ifindex2ifname (nexthop->ifindex));
+      break;
+    case NEXTHOP_TYPE_IFNAME:
+      vty_out (vty, " is directly connected, %s", nexthop->ifname);
+      break;
+    case NEXTHOP_TYPE_BLACKHOLE:
+      vty_out (vty, " is directly connected, Null0");
+      break;
+    default:
+      break;
+    }
+  vty_out(vty, "%s", VTY_NEWLINE);
+}
+
+static void
+print_rnh (struct route_node *rn, struct vty *vty)
+{
+  struct rnh *rnh;
+  struct nexthop *nexthop;
+  struct listnode *node;
+  struct zserv *client;
+  char buf[BUFSIZ];
+
+  rnh = rn->info;
+  vty_out(vty, "%s%s", inet_ntop(rn->p.family, &rn->p.u.prefix, buf, BUFSIZ),
+	  VTY_NEWLINE);
+  if (rnh->state)
+    {
+      vty_out(vty, " resolved via %s%s",
+	      zebra_route_string(rnh->state->type), VTY_NEWLINE);
+      for (nexthop = rnh->state->nexthop; nexthop; nexthop = nexthop->next)
+	print_nh(nexthop, vty);
+    }
+  else
+    vty_out(vty, " unresolved%s", VTY_NEWLINE);
+
+  vty_out(vty, " Client list:");
+  for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client))
+    vty_out(vty, " %s(fd %d)", zebra_route_string(client->proto),
+	    client->sock);
+  vty_out(vty, "%s", VTY_NEWLINE);
+}
diff --git a/zebra/zebra_rnh.h b/zebra/zebra_rnh.h
new file mode 100644
index 0000000..212bab6
--- /dev/null
+++ b/zebra/zebra_rnh.h
@@ -0,0 +1,48 @@
+/*
+ * Zebra next hop tracking header
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING.  If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_RNH_H
+#define _ZEBRA_RNH_H
+
+#include "prefix.h"
+#include "vty.h"
+
+/* Nexthop structure. */
+struct rnh
+{
+  u_char flags;
+  struct rib *state;
+  struct list *client_list;
+  struct route_node *node;
+};
+
+extern struct rnh *zebra_add_rnh(struct prefix *p, u_int32_t vrfid);
+extern struct rnh *zebra_lookup_rnh(struct prefix *p, u_int32_t vrfid);
+extern void zebra_delete_rnh(struct rnh *rnh);
+extern void zebra_add_rnh_client(struct rnh *rnh, struct zserv *client);
+extern void zebra_remove_rnh_client(struct rnh *rnh, struct zserv *client);
+extern int zebra_evaluate_rnh_table(int vrfid, int family);
+extern int zebra_dispatch_rnh_table(int vrfid, int family, struct zserv *cl);
+extern void zebra_print_rnh_table(int vrfid, int family, struct vty *vty);
+extern char *rnh_str(struct rnh *rnh, char *buf, int size);
+extern int zebra_cleanup_rnh_client(int vrf, int family, struct zserv *client);
+#endif /*_ZEBRA_RNH_H */
diff --git a/zebra/zebra_rnh_null.c b/zebra/zebra_rnh_null.c
new file mode 100644
index 0000000..68b58ce
--- /dev/null
+++ b/zebra/zebra_rnh_null.c
@@ -0,0 +1,10 @@
+#include <zebra.h>
+#include "zebra/rib.h"
+#include "zebra/zserv.h"
+#include "zebra/zebra_rnh.h"
+
+int zebra_evaluate_rnh_table (int vrfid, int family)
+{ return 0; }
+
+void zebra_print_rnh_table (int vrfid, int family, struct vty *vty)
+{}
diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c
index b3111b8..f8cad47 100644
--- a/zebra/zebra_routemap.c
+++ b/zebra/zebra_routemap.c
@@ -28,6 +28,7 @@
 #include "command.h"
 #include "filter.h"
 #include "plist.h"
+#include "nexthop.h"
 
 #include "zebra/zserv.h"
 
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 5f1fb2e..94cdd75 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -27,8 +27,10 @@
 #include "command.h"
 #include "table.h"
 #include "rib.h"
+#include "nexthop.h"
 
 #include "zebra/zserv.h"
+#include "zebra/zebra_rnh.h"
 
 /* General fucntion for static route. */
 static int
@@ -849,6 +851,28 @@ DEFUN (show_ip_route,
   return CMD_SUCCESS;
 }
 
+DEFUN (show_ip_nht,
+       show_ip_nht_cmd,
+       "show ip nht",
+       SHOW_STR
+       IP_STR
+       "IP nexthop tracking table\n")
+{
+  zebra_print_rnh_table(0, AF_INET, vty);
+  return CMD_SUCCESS;
+}
+
+DEFUN (show_ipv6_nht,
+       show_ipv6_nht_cmd,
+       "show ipv6 nht",
+       SHOW_STR
+       IP_STR
+       "IPv6 nexthop tracking table\n")
+{
+  zebra_print_rnh_table(0, AF_INET6, vty);
+  return CMD_SUCCESS;
+}
+
 DEFUN (show_ip_route_prefix_longer,
        show_ip_route_prefix_longer_cmd,
        "show ip route A.B.C.D/M longer-prefixes",
@@ -2283,6 +2307,8 @@ zebra_vty_init (void)
   install_element (CONFIG_NODE, &no_ip_route_mask_flags_distance2_cmd);
 
   install_element (VIEW_NODE, &show_ip_route_cmd);
+  install_element (VIEW_NODE, &show_ip_nht_cmd);
+  install_element (VIEW_NODE, &show_ipv6_nht_cmd);
   install_element (VIEW_NODE, &show_ip_route_addr_cmd);
   install_element (VIEW_NODE, &show_ip_route_prefix_cmd);
   install_element (VIEW_NODE, &show_ip_route_prefix_longer_cmd);
@@ -2291,6 +2317,8 @@ zebra_vty_init (void)
   install_element (VIEW_NODE, &show_ip_route_summary_cmd);
   install_element (VIEW_NODE, &show_ip_route_summary_prefix_cmd);
   install_element (ENABLE_NODE, &show_ip_route_cmd);
+  install_element (ENABLE_NODE, &show_ip_nht_cmd);
+  install_element (ENABLE_NODE, &show_ipv6_nht_cmd);
   install_element (ENABLE_NODE, &show_ip_route_addr_cmd);
   install_element (ENABLE_NODE, &show_ip_route_prefix_cmd);
   install_element (ENABLE_NODE, &show_ip_route_prefix_longer_cmd);
diff --git a/zebra/zserv.c b/zebra/zserv.c
index 330871a..fbdddf4 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -36,12 +36,14 @@
 #include "privs.h"
 #include "network.h"
 #include "buffer.h"
+#include "nexthop.h"
 
 #include "zebra/zserv.h"
 #include "zebra/router-id.h"
 #include "zebra/redistribute.h"
 #include "zebra/debug.h"
 #include "zebra/ipforward.h"
+#include "zebra/zebra_rnh.h"
 
 /* Event list of zebra. */
 enum event { ZEBRA_SERV, ZEBRA_READ, ZEBRA_WRITE };
@@ -101,7 +103,7 @@ zserv_flush_data(struct thread *thread)
   return 0;
 }
 
-static int
+int
 zebra_server_send_message(struct zserv *client)
 {
   if (client->t_suicide)
@@ -130,7 +132,7 @@ zebra_server_send_message(struct zserv *client)
   return 0;
 }
 
-static void
+void
 zserv_create_header (struct stream *s, uint16_t cmd)
 {
   /* length placeholder, caller can update */
@@ -594,6 +596,65 @@ zsend_ipv4_nexthop_lookup (struct zserv *client, struct in_addr addr)
   return zebra_server_send_message(client);
 }
 
+/* Nexthop register */
+static int
+zserv_nexthop_register (struct zserv *client, int sock, u_short length)
+{
+  struct rnh *rnh;
+  struct stream *s;
+  struct prefix p;
+  u_short l = 0;
+
+  if (IS_ZEBRA_DEBUG_NHT)
+    zlog_debug("nexthop_register msg from client %s: length=%d\n",
+	       zebra_route_string(client->proto), length);
+
+  s = client->ibuf;
+
+  while (l < length)
+    {
+      p.family = stream_getw(s);
+      p.prefixlen = stream_getc(s);
+      l += 3;
+      stream_get(&p.u.prefix, s, PSIZE(p.prefixlen));
+      l += PSIZE(p.prefixlen);
+      rnh = zebra_add_rnh(&p, 0);
+      zebra_add_rnh_client(rnh, client);
+    }
+  zebra_evaluate_rnh_table(0, AF_INET);
+  zebra_evaluate_rnh_table(0, AF_INET6);
+  return 0;
+}
+
+/* Nexthop register */
+static int
+zserv_nexthop_unregister (struct zserv *client, int sock, u_short length)
+{
+  struct rnh *rnh;
+  struct stream *s;
+  struct prefix p;
+  u_short l = 0;
+
+  if (IS_ZEBRA_DEBUG_NHT)
+    zlog_debug("nexthop_unregister msg from client %s: length=%d\n",
+	       zebra_route_string(client->proto), length);
+
+  s = client->ibuf;
+
+  while (l < length)
+    {
+      p.family = stream_getw(s);
+      p.prefixlen = stream_getc(s);
+      l += 3;
+      stream_get(&p.u.prefix, s, PSIZE(p.prefixlen));
+      l += PSIZE(p.prefixlen);
+      rnh = zebra_lookup_rnh(&p, 0);
+      if (rnh)
+	zebra_remove_rnh_client(rnh, client);
+    }
+  return 0;
+}
+
 static int
 zsend_ipv4_import_lookup (struct zserv *client, struct prefix_ipv4 *p)
 {
@@ -1180,6 +1241,7 @@ zread_hello (struct zserv *client)
                     client->sock);
 
       route_type_oaths[proto] = client->sock;
+      client->proto = proto;
     }
 }
 
@@ -1205,6 +1267,9 @@ zebra_score_rib (int client_sock)
 static void
 zebra_client_close (struct zserv *client)
 {
+  zebra_cleanup_rnh_client(0, AF_INET, client);
+  zebra_cleanup_rnh_client(0, AF_INET6, client);
+
   /* Close file descriptor. */
   if (client->sock)
     {
@@ -1417,6 +1482,12 @@ zebra_client_read (struct thread *thread)
     case ZEBRA_HELLO:
       zread_hello (client);
       break;
+    case ZEBRA_NEXTHOP_REGISTER:
+      zserv_nexthop_register(client, sock, length);
+      break;
+    case ZEBRA_NEXTHOP_UNREGISTER:
+      zserv_nexthop_unregister(client, sock, length);
+      break;
     default:
       zlog_info ("Zebra received unknown command %d", command);
       break;
@@ -1690,8 +1761,10 @@ DEFUN (show_zebra_client,
   struct zserv *client;
 
   for (ALL_LIST_ELEMENTS_RO (zebrad.client_list, node, client))
-    vty_out (vty, "Client fd %d%s", client->sock, VTY_NEWLINE);
-  
+    vty_out (vty, "Client %s fd %d%s",
+	     zebra_route_string(client->proto), client->sock,
+	     VTY_NEWLINE);
+
   return CMD_SUCCESS;
 }
 
diff --git a/zebra/zserv.h b/zebra/zserv.h
index 5e8bcca..92b8ba3 100644
--- a/zebra/zserv.h
+++ b/zebra/zserv.h
@@ -66,6 +66,9 @@ struct zserv
 
   /* Router-id information. */
   u_char ridinfo;
+
+  /* client's protocol */
+  u_char proto;
 };
 
 /* Zebra instance */
@@ -110,4 +113,7 @@ extern int zsend_router_id_update(struct zserv *, struct prefix *);
 
 extern pid_t pid;
 
+extern void zserv_create_header(struct stream *s, uint16_t cmd);
+extern int zebra_server_send_message(struct zserv *client);
+
 #endif /* _ZEBRA_ZEBRA_H */


_______________________________________________
Quagga-dev mailing list
Quagga-dev@lists.quagga.net
http://lists.quagga.net/mailman/listinfo/quagga-dev
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic