[prev in list] [next in list] [prev in thread] [next in thread]
List: spamassassin-devel
Subject: [PATCH] Allow SQLite use for Bayes
From: Matt Corallo <saaul () mattcorallo ! com>
Date: 2021-09-22 22:28:13
Message-ID: 596719a8-15b0-72e8-9ede-94a29074dc54 () bluematt ! me
[Download RAW message or body]
(note: not subscribed, please CC on resposnes)
SQLite does not support the RPAD function. Instead, we do it manually with sprintf.
Note that I do now know very much perl, so this is probably not the ideal way to do it, but this
does seem to work quite well in my own testing.
--- SQL_orig.pm 2021-09-22 19:41:01.724517821 +0000
+++ /usr/share/perl5/Mail/SpamAssassin/BayesStore/SQL.pm 2021-09-22 19:46:54.061166896 +0000
@@ -629,9 +629,7 @@
return unless (defined($self->{_dbh}));
# 0/0 tokens don't count, but in theory we shouldn't have any
- my $token_select = $self->_token_select_string();
-
- my $sql = "SELECT $token_select, spam_count, ham_count, atime
+ my $sql = "SELECT token, spam_count, ham_count, atime
FROM bayes_token
WHERE id = ?
AND (spam_count > 0 OR ham_count > 0)";
@@ -650,7 +648,8 @@
return;
}
- while (my ($token, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
+ while (my ($token_empty, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
+ my $token = sprintf "%-5s", $token_empty;
my $prob = $self->{bayes}->_compute_prob_for_token($token, $vars[1], $vars[2],
$spam_count, $ham_count);
$prob ||= 0.5;
@@ -863,9 +862,7 @@
my $results_index = 0;
my $bunch_end;
- my $token_select = $self->_token_select_string();
-
- my $multi_sql = "SELECT $token_select, spam_count, ham_count, atime
+ my $multi_sql = "SELECT token, spam_count, ham_count, atime
FROM bayes_token
WHERE id = ?
AND token IN ";
@@ -914,6 +911,7 @@
foreach my $result (@{$results}) {
# Make sure that spam_count and ham_count are not negative
+ $result->[0] = sprintf "%-5s", $result->[0];
$result->[1] = 0 if (!$result->[1] || $result->[1] < 0);
$result->[2] = 0 if (!$result->[2] || $result->[2] < 0);
# Make sure that atime has a value
@@ -1341,9 +1339,7 @@
print "v\t$num_spam\tnum_spam\n" or die "Error writing: $!";
print "v\t$num_ham\tnum_nonspam\n" or die "Error writing: $!";
- my $token_select = $self->_token_select_string();
-
- my $token_sql = "SELECT spam_count, ham_count, atime, $token_select
+ my $token_sql = "SELECT spam_count, ham_count, atime, token
FROM bayes_token
WHERE id = ?
AND (spam_count > 0 OR ham_count > 0)";
@@ -1367,7 +1363,8 @@
}
while (my @values = $sth->fetchrow_array()) {
- $values[3] = unpack("H*", $values[3]);
+ my $token = sprintf "%-5s", $values[3];
+ $values[3] = unpack("H*", $token);
print "t\t" . join("\t", @values) . "\n"
or die "Error writing: $!";
}
@@ -2340,22 +2337,6 @@
return $num_lowfreq;
}
-=head2 _token_select_string
-
-private instance (String) _token_select_string
-
-Description:
-This method returns the string to be used in SELECT statements to represent
-the token column.
-
-The default is to use the RPAD function to pad the token out to 5 characters.
-
-=cut
-
-sub _token_select_string {
- return "RPAD(token, 5, ' ')";
-}
-
sub sa_die { Mail::SpamAssassin::sa_die(@_); }
1;
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic