[prev in list] [next in list] [prev in thread] [next in thread]
List: djabberd
Subject: JID case sensitivity, again
From: Alex Vandiver <alexmv () bestpractical ! com>
Date: 2009-09-25 18:28:00
Message-ID: 1253903280.16254.14.camel () localhost
[Download RAW message or body]
Heya,
I was cleaning up some local repositories the other day, and noticed
that the JID case sensitivity patches have yet to be applied. I'd love
to not have to keep a local fork of djabberd[1] in order to fix this
rather easily fixable but fundamentally important issue. As we've seen
on the lists, this is not purely a theoretical problem -- people are
actually being bitten by it.
The only reason I've heard for not applying them was fears about
backwards incompatability. With the attached patches, users' options
are:
* Add 'CaseSensitive on', and nothing is changed.
* Run 'fix-jabber-roster-case' to fix up SQLite rosters.
* If you have non-SQLite rosters, write a tool to do the case-folding
and merging for your own storage format.
I can't see any way to make this easier than the above, and I really do
want to see this bug put to rest.
- Alex
[1] http://github.com/bestpractical/djabberd
["0001-Do-stringprep-as-specified-by-the-RFC-on-JID-object-.patch" (0001-Do-stringprep-as-specified-by-the-RFC-on-JID-object-.patch)]
>From 7234cc3410b9e72dec7e0ff1763aadfc260b0144 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alexmv@bestpractical.com>
Date: Thu, 26 Mar 2009 21:24:28 -0400
Subject: [PATCH 1/5] Do stringprep, as specified by the RFC, on JID object creation
---
DJabberd/Makefile.PL | 1 +
DJabberd/lib/DJabberd/JID.pm | 89 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 89 insertions(+), 1 deletions(-)
diff --git a/DJabberd/Makefile.PL b/DJabberd/Makefile.PL
index 6de3e23..cfc06e2 100644
--- a/DJabberd/Makefile.PL
+++ b/DJabberd/Makefile.PL
@@ -14,6 +14,7 @@ WriteMakefile(
'Net::SSLeay' => 0,
'Log::Log4perl' => 0,
'Digest::HMAC_SHA1' => 0,
+ 'Unicode::Stringprep' => 0,
},
clean => { FILES => 't/log/*' },
AUTHOR => 'Brad Fitzpatrick <brad@danga.com>',
diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm
index 3959ee5..f37e29c 100644
--- a/DJabberd/lib/DJabberd/JID.pm
+++ b/DJabberd/lib/DJabberd/JID.pm
@@ -13,6 +13,84 @@ use constant AS_STRING => 3;
use constant AS_BSTRING => 4;
use constant AS_STREXML => 5;
+# Stringprep functions for converting to canonical form
+use Unicode::Stringprep;
+use Unicode::Stringprep::Mapping;
+use Unicode::Stringprep::Prohibited;
+my $nodeprep = Unicode::Stringprep->new(
+ 3.2,
+ [
+ \@Unicode::Stringprep::Mapping::B1,
+ \@Unicode::Stringprep::Mapping::B2,
+ ],
+ 'KC',
+ [
+ \@Unicode::Stringprep::Prohibited::C11,
+ \@Unicode::Stringprep::Prohibited::C12,
+ \@Unicode::Stringprep::Prohibited::C21,
+ \@Unicode::Stringprep::Prohibited::C22,
+ \@Unicode::Stringprep::Prohibited::C3,
+ \@Unicode::Stringprep::Prohibited::C4,
+ \@Unicode::Stringprep::Prohibited::C5,
+ \@Unicode::Stringprep::Prohibited::C6,
+ \@Unicode::Stringprep::Prohibited::C7,
+ \@Unicode::Stringprep::Prohibited::C8,
+ \@Unicode::Stringprep::Prohibited::C9,
+ [
+ 0x0022, undef, # "
+ 0x0026, undef, # &
+ 0x0027, undef, # '
+ 0x002F, undef, # /
+ 0x003A, undef, # :
+ 0x003C, undef, # <
+ 0x003E, undef, # >
+ 0x0040, undef, # @
+ ]
+ ],
+ 1,
+);
+my $nameprep = Unicode::Stringprep->new(
+ 3.2,
+ [
+ \@Unicode::Stringprep::Mapping::B1,
+ \@Unicode::Stringprep::Mapping::B2,
+ ],
+ 'KC',
+ [
+ \@Unicode::Stringprep::Prohibited::C12,
+ \@Unicode::Stringprep::Prohibited::C22,
+ \@Unicode::Stringprep::Prohibited::C3,
+ \@Unicode::Stringprep::Prohibited::C4,
+ \@Unicode::Stringprep::Prohibited::C5,
+ \@Unicode::Stringprep::Prohibited::C6,
+ \@Unicode::Stringprep::Prohibited::C7,
+ \@Unicode::Stringprep::Prohibited::C8,
+ \@Unicode::Stringprep::Prohibited::C9,
+ ],
+ 1,
+);
+my $resourceprep = Unicode::Stringprep->new(
+ 3.2,
+ [
+ \@Unicode::Stringprep::Mapping::B1,
+ ],
+ 'KC',
+ [
+ \@Unicode::Stringprep::Prohibited::C12,
+ \@Unicode::Stringprep::Prohibited::C21,
+ \@Unicode::Stringprep::Prohibited::C22,
+ \@Unicode::Stringprep::Prohibited::C3,
+ \@Unicode::Stringprep::Prohibited::C4,
+ \@Unicode::Stringprep::Prohibited::C5,
+ \@Unicode::Stringprep::Prohibited::C6,
+ \@Unicode::Stringprep::Prohibited::C7,
+ \@Unicode::Stringprep::Prohibited::C8,
+ \@Unicode::Stringprep::Prohibited::C9,
+ ],
+ 1,
+);
+
+
# returns DJabberd::JID object, or undef on failure due to invalid format
sub new {
#my ($class, $jidstring) = @_;
@@ -29,7 +107,16 @@ sub new {
(?: /(.{1,1023}) )? # $3: optional resource
$!x;
- return bless [ $1, $2, $3 ], $_[0];
+ # Stringprep uses regexes, so store these away first
+ my ($node, $host, $res) = ($1, $2, $3);
+
+ return eval {
+ bless [
+ defined $node ? $nodeprep->($node) : undef,
+ $nameprep->($host),
+ defined $res ? $resourceprep->($res) : undef,
+ ], $_[0]
+ };
}
sub is_bare {
--
1.6.3.3.473.gb74fc4.dirty
["0002-Lower-case-all-domains-for-checking-verification-sta.patch" (0002-Lower-case-all-domains-for-checking-verification-sta.patch)]
> From 81a71a37d10546f1ecae8dd09e878114ecb45a29 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alexmv@bestpractical.com>
Date: Thu, 9 Apr 2009 13:17:42 -0400
Subject: [PATCH 2/5] Lower-case all domains for checking verification status
---
DJabberd/lib/DJabberd/Connection/ServerIn.pm | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/DJabberd/lib/DJabberd/Connection/ServerIn.pm \
b/DJabberd/lib/DJabberd/Connection/ServerIn.pm index 32c7599..d7ca323 100644
--- a/DJabberd/lib/DJabberd/Connection/ServerIn.pm
+++ b/DJabberd/lib/DJabberd/Connection/ServerIn.pm
@@ -16,7 +16,7 @@ sub set_vhost {
sub peer_domain_is_verified {
my ($self, $domain) = @_;
- return $self->{verified_remote_domain}->{$domain};
+ return $self->{verified_remote_domain}->{lc $domain};
}
sub on_stream_start {
@@ -140,7 +140,7 @@ sub dialback_result_valid {
my %opts = @_;
my $res = qq{<db:result from='$opts{recv_server}' to='$opts{orig_server}' \
type='valid'/>};
- $self->{verified_remote_domain}->{$opts{orig_server}} = $opts{orig_server};
+ $self->{verified_remote_domain}->{lc $opts{orig_server}} = $opts{orig_server};
$self->log->debug("Dialback result valid for connection $self->{id}. \
from=$opts{recv_server}, to=$opts{orig_server}: $res\n"); $self->write($res);
}
--
1.6.3.3.473.gb74fc4.dirty
["0003-Add-global-CaseSensitive-flag-for-backwards-compatib.patch" (0003-Add-global-CaseSensitive-flag-for-backwards-compatib.patch)]
>From e327279d56e61e9c84ff393d03c13ae0ee144850 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alexmv@bestpractical.com>
Date: Fri, 3 Apr 2009 17:47:36 -0400
Subject: [PATCH 3/5] Add global CaseSensitive flag for backwards compatibility
---
DJabberd/lib/DJabberd.pm | 5 +++++
DJabberd/lib/DJabberd/JID.pm | 7 +++++++
2 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/DJabberd/lib/DJabberd.pm b/DJabberd/lib/DJabberd.pm
index 9debd3d..a168fdb 100644
--- a/DJabberd/lib/DJabberd.pm
+++ b/DJabberd/lib/DJabberd.pm
@@ -167,6 +167,11 @@ sub fake_s2s_peer {
return $fake_peers{$host};
}
+sub set_config_casesensitive {
+ my ($self, $val) = @_;
+ $DJabberd::JID::CASE_SENSITIVE = as_bool($val);
+}
+
sub add_vhost {
my ($self, $vhost) = @_;
my $sname = lc $vhost->name;
diff --git a/DJabberd/lib/DJabberd/JID.pm b/DJabberd/lib/DJabberd/JID.pm
index f37e29c..bfef24c 100644
--- a/DJabberd/lib/DJabberd/JID.pm
+++ b/DJabberd/lib/DJabberd/JID.pm
@@ -3,6 +3,9 @@ use strict;
use DJabberd::Util qw(exml);
use Digest::SHA1;
+# Configurable via 'CaseSensitive' config option
+our $CASE_SENSITIVE = 0;
+
use overload
'""' => \&as_string_exml;
@@ -107,6 +110,10 @@ sub new {
(?: /(.{1,1023}) )? # $3: optional resource
$!x;
+ # If we're in case-sensitive mode, for backwards-compatibility,
+ # then skip stringprep
+ return bless [ $1, $2, $3 ], $_[0] if $DJabberd::JID::CASE_SENSITIVE;
+
# Stringprep uses regexes, so store these away first
my ($node, $host, $res) = ($1, $2, $3);
--
1.6.3.3.473.gb74fc4.dirty
["0004-Add-tool-to-unify-mixed-case-records-in-existing-SQL.patch" (0004-Add-tool-to-unify-mixed-case-records-in-existing-SQL.patch)]
> From 2996bf98e3597ce928f2cd4e826542621c83dd76 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alexmv@bestpractical.com>
Date: Thu, 24 Sep 2009 17:35:42 -0400
Subject: [PATCH 4/5] Add tool to unify mixed-case records in existing SQLite rosters
---
.../fix-jabber-roster-case | 176 ++++++++++++++++++++
1 files changed, 176 insertions(+), 0 deletions(-)
create mode 100755 DJabberd-RosterStorage-SQLite/fix-jabber-roster-case
diff --git a/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case \
b/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case new file mode 100755
index 0000000..e3760f8
--- /dev/null
+++ b/DJabberd-RosterStorage-SQLite/fix-jabber-roster-case
@@ -0,0 +1,176 @@
+#!/usr/bin/perl
+
+use DJabberd::JID;
+use DJabberd::Subscription;
+use DBI;
+use strict;
+use warnings;
+
+die "$0 must be run with a case-folding version of DJabberd\n"
+ unless DJabberd::JID->new('TEST@example.com')->eq(DJabberd::JID->new('test@example.com'));
+
+my $roster = shift @ARGV;
+die "Usage: $0 rosterfile\n" unless defined $roster and -f $roster;
+
+# Note that all work is performed in a transaction for consistency,
+# hence AutoCommit => 0
+my $dbh = DBI->connect(
+ "dbi:SQLite:dbname=$roster", "", "",
+ { RaiseError => 1, PrintError => 0, AutoCommit => 0 }
+);
+
+my $all_dups = $dbh->selectcol_arrayref(<<SQL);
+SELECT LOWER(first.jid)
+ FROM jidmap first
+ JOIN jidmap other
+ ON LOWER(first.jid) = LOWER(other.jid)
+ LEFT JOIN jidmap later
+ ON LOWER(later.jid) = LOWER(first.jid)
+ AND later.jidid > first.jidid
+ GROUP BY first.jid
+HAVING COUNT(DISTINCT other.jid) > 1
+ AND later.jid IS NULL
+ ORDER BY COUNT(DISTINCT other.jid), LOWER(first.jid)
+SQL
+
+for my $name (@{$all_dups}) {
+ warn "Looking at $name\n";
+
+ # Grab the set of dups
+ my $dups = $dbh->selectall_arrayref(<<SQL, undef, $name);
+SELECT jidid, jid FROM jidmap WHERE LOWER(jid) = ?
+SQL
+
+ # For later use, build up the set of JIDs ids to coalesce.
+ my $jidset = "(" . join(",", map {$_->[0]} @$dups) .")";
+ warn " JID equivalence set is $jidset\n";
+
+ # Since this is the output of LOWER(jid), it is _probably_ already
+ # correct, but we run it through canonicalize to get KC
+ # normalization, etc
+ $name = canonicalize($name);
+ next unless defined $name;
+ warn " Canonical form is $name\n";
+
+ # Blow away all of the duplicate JIDs, and re-insert the new name
+ $dbh->do("DELETE FROM jidmap WHERE jidid IN $jidset");
+ $dbh->do("INSERT INTO jidmap(jid) VALUES(?)", undef, $name);
+ my $newid = $dbh->last_insert_id(undef, undef, "jidmap", "jidid");
+ warn " New row id is $newid\n";
+
+ # Next, find all of the places in the roster where the old IDs
+ # showed up, either as userids or contactids
+ my %types = (userid => "contactid", contactid => "userid");
+ for my $column (keys %types) {
+ warn " Looking for occurrances in $column\n";
+ my $other = $types{$column};
+ # We now generate a list of contacts that any of the JIDs as
+ # the user, (or users who have any of the JIDs as a contact)
+ my $otherlist = $dbh->selectcol_arrayref(<<SQL);
+SELECT $other FROM roster WHERE $column IN $jidset GROUP BY $other
+SQL
+ for my $otherval (@$otherlist) {
+ my $merge = $dbh->selectall_arrayref(<<SQL, { Slice => {} });
+SELECT $column, name, subscription FROM roster WHERE $other = $otherval AND $column \
IN $jidset ORDER BY $column ASC +SQL
+ # We now have a set of information to merge together, in $merge
+ warn " $otherval references JIDs @{[map $_->{$column}, @$merge]} in \
$column\n"; + $dbh->do(<<SQL, undef, $newid, $otherval, merge(@$merge));
+INSERT INTO roster($column, $other, name, subscription) VALUES(?, ?, ?, ?)
+SQL
+ }
+ }
+
+ # Delete any references to old JIDs in the roster
+ $dbh->do("DELETE FROM roster WHERE contactid IN $jidset OR userid IN $jidset");
+
+ # Merge roster groups
+ my $grouplist = $dbh->selectcol_arrayref(<<SQL);
+SELECT name FROM rostergroup WHERE userid IN $jidset GROUP BY name
+SQL
+ for my $groupname (@$grouplist) {
+ # Make a new group of this name for the new JID's id
+ $dbh->do("INSERT INTO rostergroup(userid, name) VALUES(?, ?)", undef, \
$newid, $groupname); + my $groupid = $dbh->last_insert_id(undef, undef, \
"rostergroup", "groupid"); + # Find all of the distinct contacts in groups of \
this name in + # any of the old JIDs, and add them to the new group
+ my $merge = $dbh->do(<<SQL, undef, $groupid, $groupname);
+INSERT INTO groupitem(groupid, contactid)
+SELECT ?, groupitem.contactid
+ FROM rostergroup
+ JOIN groupitem
+ ON rostergroup.groupid = groupitem.groupid
+ WHERE rostergroup.name = ?
+ AND rostergroup.userid IN $jidset
+ GROUP BY groupitem.contactid
+SQL
+ }
+ # Remove the old groups
+ $dbh->do("DELETE FROM rostergroup WHERE userid IN $jidset");
+
+ # Look for places the any of the old JIDs appeared in other roster
+ # groups, and replace them with the new JID's id
+ $dbh->do(<<SQL, undef, $newid);
+INSERT INTO groupitem(groupid, contactid)
+SELECT groupid, ?
+ FROM groupitem
+ WHERE contactid IN $jidset
+ GROUP BY groupid
+SQL
+ # Remove the old contacts from the groups
+ $dbh->do("DELETE FROM groupitem WHERE contactid IN $jidset");
+
+}
+
+# The above merely handles cases where one JID appears twice; now we
+# iterate though _all_ names, and lower-case them, to catch JIDs which
+# are incorrectly cased but only appear once.
+my $all = $dbh->selectall_arrayref("SELECT jid, jidid FROM jidmap", { Slice => {} \
}); +for my $row (@{$all}) {
+ # Canonicalize the name. Since this is the output of LOWER(jid),
+ # it is _probably_ already correct, but we round-trip through
+ # DJabberd::JID to ensure we get KC normalization, and the like
+ my $name = canonicalize($row->{jid});
+ next unless defined $name;
+ next if $name eq $row->{jid};
+ warn "Looking at @{[$row->{jid}]}\n";
+ warn " Canonical form is $name\n";
+ $dbh->do("UPDATE jidmap SET jid = ? WHERE jidid = ?", undef, $name, \
$row->{jidid}); +}
+
+$dbh->commit or die "Error commiting changes: ".$dbh->errstr;
+
+
+
+
+sub canonicalize {
+ # Canonicalize the name. We round-trip through DJabberd::JID to
+ # ensure we get KC normalization, case folding, and the like
+ my $name = shift;
+ my $jid = DJabberd::JID->new($name);
+ unless ($jid) {
+ warn "Can't make canonical form of $name!";
+ return undef;
+ }
+ return $jid->as_string;
+}
+
+sub merge {
+ my (@merge) = @_;
+
+ # Trivial case
+ return ($merge[0]->{name}, $merge[0]->{subscription})
+ if @merge == 1;
+
+ # More complex case; name is arbitrarily picked to be first non-null name, or \
null + my($name) = grep {defined} map {$_->{name}} @merge;
+ @merge = map {DJabberd::Subscription->from_bitmask($_->{subscription})} @merge;
+
+ # to overrides pendout, from overrides pendin
+ my $merged = DJabberd::Subscription->new;
+ $merged->{to} = 1 if grep {$_->{to}} @merge;
+ $merged->{pendout} = 1 if not $merged->{to} and grep {$_->{pendout}} @merge;
+ $merged->{from} = 1 if grep {$_->{from}} @merge;
+ $merged->{pendin} = 1 if not $merged->{from} and grep {$_->{pendin}} @merge;
+ return $name, $merged->as_bitmask;
+}
--
1.6.3.3.473.gb74fc4.dirty
["0005-Update-CHANGES-file.patch" (0005-Update-CHANGES-file.patch)]
>From f2431a189c3282f4ef805548e53d182387c345f2 Mon Sep 17 00:00:00 2001
From: Alex Vandiver <alex@chmrr.net>
Date: Fri, 25 Sep 2009 14:21:34 -0400
Subject: [PATCH 5/5] Update CHANGES file
---
DJabberd/CHANGES | 9 +++++++++
1 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/DJabberd/CHANGES b/DJabberd/CHANGES
index cc4ca2c..155a9dd 100644
--- a/DJabberd/CHANGES
+++ b/DJabberd/CHANGES
@@ -1,3 +1,12 @@
+ - IMPORTANT: JIDs are now case insensitive, and properly UTF8
+ normalized, as the spec calls for. If you wish to preserve the
+ old behavior, set 'CaseSensitive' in your configuration. If you
+ have existing roster storage, you will need to ensure that
+ existing entries with differing case are merged. If you use the
+ SQLite roster, 'fix-jabber-roster-case' has been provided in the
+ DJabberd::RosterStorage::SQLite distribution.
+ (Alex Vandiver <alexmv@bestpractical.com>)
+
- Some configuration file docs
(Alex Vandiver <alexmv@bestpractical.com>)
--
1.6.3.3.473.gb74fc4.dirty
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic