[prev in list] [next in list] [prev in thread] [next in thread] 

List:       spamassassin-devel
Subject:    [PATCH] Allow SQLite use for Bayes
From:       Matt Corallo <saaul () mattcorallo ! com>
Date:       2021-09-22 22:28:13
Message-ID: 596719a8-15b0-72e8-9ede-94a29074dc54 () bluematt ! me
[Download RAW message or body]

(note: not subscribed, please CC on resposnes)

SQLite does not support the RPAD function. Instead, we do it manually with sprintf.

Note that I do now know very much perl, so this is probably not the ideal way to do it, but this 
does seem to work quite well in my own testing.

--- SQL_orig.pm	2021-09-22 19:41:01.724517821 +0000
+++ /usr/share/perl5/Mail/SpamAssassin/BayesStore/SQL.pm	2021-09-22 19:46:54.061166896 +0000
@@ -629,9 +629,7 @@
    return unless (defined($self->{_dbh}));

    # 0/0 tokens don't count, but in theory we shouldn't have any
-  my $token_select = $self->_token_select_string();
-
-  my $sql = "SELECT $token_select, spam_count, ham_count, atime
+  my $sql = "SELECT token, spam_count, ham_count, atime
                 FROM bayes_token
                WHERE id = ?
                  AND (spam_count > 0 OR ham_count > 0)";
@@ -650,7 +648,8 @@
      return;
    }

-  while (my ($token, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
+  while (my ($token_empty, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
+    my $token = sprintf "%-5s", $token_empty;
      my $prob = $self->{bayes}->_compute_prob_for_token($token, $vars[1], $vars[2],
  						      $spam_count, $ham_count);
      $prob ||= 0.5;
@@ -863,9 +862,7 @@
    my $results_index = 0;
    my $bunch_end;

-  my $token_select = $self->_token_select_string();
-
-  my $multi_sql = "SELECT $token_select, spam_count, ham_count, atime
+  my $multi_sql = "SELECT token, spam_count, ham_count, atime
                       FROM bayes_token
                      WHERE id = ?
                        AND token IN ";
@@ -914,6 +911,7 @@

        foreach my $result (@{$results}) {
  	# Make sure that spam_count and ham_count are not negative
+	$result->[0] = sprintf "%-5s", $result->[0];
  	$result->[1] = 0 if (!$result->[1] || $result->[1] < 0);
  	$result->[2] = 0 if (!$result->[2] || $result->[2] < 0);
  	# Make sure that atime has a value
@@ -1341,9 +1339,7 @@
    print "v\t$num_spam\tnum_spam\n"    or die "Error writing: $!";
    print "v\t$num_ham\tnum_nonspam\n"  or die "Error writing: $!";

-  my $token_select = $self->_token_select_string();
-
-  my $token_sql = "SELECT spam_count, ham_count, atime, $token_select
+  my $token_sql = "SELECT spam_count, ham_count, atime, token
                       FROM bayes_token
                      WHERE id = ?
                        AND (spam_count > 0 OR ham_count > 0)";
@@ -1367,7 +1363,8 @@
    }

    while (my @values = $sth->fetchrow_array()) {
-    $values[3] = unpack("H*", $values[3]);
+    my $token = sprintf "%-5s", $values[3];
+    $values[3] = unpack("H*", $token);
      print "t\t" . join("\t", @values) . "\n"
        or die "Error writing: $!";
    }
@@ -2340,22 +2337,6 @@
    return $num_lowfreq;
  }

-=head2 _token_select_string
-
-private instance (String) _token_select_string
-
-Description:
-This method returns the string to be used in SELECT statements to represent
-the token column.
-
-The default is to use the RPAD function to pad the token out to 5 characters.
-
-=cut
-
-sub _token_select_string {
-  return "RPAD(token, 5, ' ')";
-}
-
  sub sa_die { Mail::SpamAssassin::sa_die(@_); }

  1;
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic