'[kupu-checkins] r11669 - in kupu/trunk/kupu: cgi common'

[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kupu-checkins
Subject:    [kupu-checkins] r11669 - in kupu/trunk/kupu: cgi common
From:       guido () codespeak ! net
Date:       2005-04-30 23:12:02
Message-ID: 20050430231202.E009C27BBA () code1 ! codespeak ! net
[Download RAW message or body]

Author: guido
Date: Sun May  1 01:12:02 2005
New Revision: 11669

Modified:
   kupu/trunk/kupu/cgi/spellcheck.cgi
   kupu/trunk/kupu/common/kupuspellchecker.js
Log:
Changed the spellchecking feature a bit: instead of plain text now an XML file
is returned by the CGI script, containing replacement hints in addition to
information about what words are incorrect.


Modified: kupu/trunk/kupu/cgi/spellcheck.cgi
==============================================================================
--- kupu/trunk/kupu/cgi/spellcheck.cgi	(original)
+++ kupu/trunk/kupu/cgi/spellcheck.cgi	Sun May  1 01:12:02 2005
@@ -2,44 +2,87 @@
 
 """SpellChecker for Kupu"""
 
-COMMAND = 'aspell -l'
+COMMAND = 'aspell -a'
 
 import popen2, re
 
 class SpellChecker:
     """Simple spell checker, uses ispell (or aspell) with pipes"""
 
-    reg_split = re.compile("[^\w']+", re.UNICODE)
+    reg_unknown = re.compile('^& ([\w\']*) \d* \d*: (.*)$', re.U)
 
-    def __init__(self, text):
+    def __init__(self):
         self.chout, self.chin = popen2.popen2(COMMAND)
-        self.text = text
-        self.result = None
+        # throw away intro
+        self.read_line()
 
-    def check(self):
+    def __del__(self):
+        self.chout.close()
+        self.chin.close()
+
+    def check(self, text):
         """checks a line of text
         
             returns None if spelling was okay, and an HTML string with words 
             that weren't recognized marked (with a span class="wrong_spelling")
         """
-        if self.result is None:
-            # send it to the child app
-            self.chin.write(self.text)
+        result = {}
+        for line in text.split('\n'):
+            line = line.strip()
+            if line:
+                self.write_line(line)
+            while 1:
+                resline = self.read_line()
+                if not resline.strip():
+                    break
+                if resline.strip() != '*':
+                    match = self.reg_unknown.match(resline)
+                    assert match, 'Unknown formatted line: %s' % resline
+                    word = match.group(1)
+                    if result.has_key(word):
+                        continue
+                    replacements = match.group(2).split(', ')
+                    result[word] = replacements
+        return result
+
+    def read_line(self):
+        buf = []
+        while 1:
+            char = self.read_char()
+            if char == '\n':
+                return ''.join(buf)
+            buf.append(char)
+
+    def write_line(self, line):
+        try:
+            self.chin.write('%s\n' % line)
             self.chin.flush()
-            # close in (this makes the app spew out the result)
-            self.chin.close()
-            
-            # read the result
-            result = self.chout.read()
-            result = ' '.join(result.split())
-            if not result:
-                result = None
+            return
+        except IOError:
+            self.reconnect()
+            self.chin.write('%s\n' % line)
+            self.chin.flush()
+            return
+        raise
 
-            # close out
+    def read_char(self):
+        try:
+            return self.chout.read(1)
+        except IOError:
+            self.reconnect()
+            return self.chout.read(1)
+        raise
+
+    def reconnect(self):
+        try:
             self.chout.close()
-            
-            self.result = result
-        return self.result
+        except IOError:
+            pass
+        try:
+            self.chin.close()
+        except IOError:
+            pass
+        self.chout, self.chin = popen2.popen2(COMMAND)
 
 def is_cgi():
     import os
@@ -47,28 +90,43 @@
         return True
     return False
 
-if is_cgi():
-    import cgi, cgitb
-    #cgitb.enable()
-    #result = repr(sys.stdin.read())
-    data = cgi.FieldStorage()
-    data = data['text'].value
-    c = SpellChecker(data)
-    result = c.check()
-    if result == None:
-        result = ''
-    print 'Content-Type: text/plain'
-    print 'Content-Length: %s' % len(result)
-    print
-    print result
-elif __name__ == '__main__':
-    while 1:
-        line = raw_input('Enter text to check: ')
-        if line == 'q':
-            break
-        c = SpellChecker(line)
-        ret = c.check()
-        if ret is None:
-            print 'okay'
+def format_result(result):
+    """convert the result dict to XML"""
+    buf = ['<?xml version="1.0" encoding="UTF-8" ?>\n<spellcheck_result>']
+    for key, value in result.items():
+        buf.append('<incorrect><word>')
+        buf.append(key)
+        buf.append('</word><replacements>')
+        buf.append(' '.join(value))
+        buf.append('</replacements></incorrect>')
+    buf.append('</spellcheck_result>')
+    return ''.join(buf)
+
+if __name__ == '__main__':
+    if is_cgi():
+        import cgi, cgitb
+        #cgitb.enable()
+        #result = repr(sys.stdin.read())
+        data = cgi.FieldStorage()
+        data = data['text'].value
+        c = SpellChecker()
+        result = c.check(data)
+        if result == None:
+            result = ''
         else:
-            print ret
+            result = format_result(result)
+        print 'Content-Type: text/xml,charset=UTF-8'
+        print 'Content-Length: %s' % len(result)
+        print
+        print result
+    else:
+        c = SpellChecker()
+        while 1:
+            line = raw_input('Enter text to check: ')
+            if line == 'q':
+                break
+            ret = c.check(line)
+            if ret is None:
+                print 'okay'
+            else:
+                print ret

Modified: kupu/trunk/kupu/common/kupuspellchecker.js
==============================================================================
--- kupu/trunk/kupu/common/kupuspellchecker.js	(original)
+++ kupu/trunk/kupu/common/kupuspellchecker.js	Sun May  1 01:12:02 2005
@@ -31,11 +31,16 @@
 
 KupuSpellChecker.prototype.stateChangeHandler = function(request) {
     if (request.readyState == 4) {
-        var result = request.responseText;
-        if (!result) {
-            alert('There were no errors.');
+        if (request.status == '200') {
+            var result = request.responseXML;
+            result = this.xmlToMapping(result);
+            if (!result) {
+                alert('There were no errors.');
+            } else {
+                this.displayUnrecognized(result);
+            };
         } else {
-            this.displayUnrecognized(result);
+            alert('Error loading data, status ' + request.status);
         };
     };
 };
@@ -60,7 +65,7 @@
     return bits.join(' ');
 };
 
-KupuSpellChecker.prototype.displayUnrecognized = function(words) {
+KupuSpellChecker.prototype.displayUnrecognized = function(mapping) {
     // copy the current editable document into a new window
     var doc = this.editor.getInnerDocument().documentElement;
     var win = window.open('kupublank.html', 'spellchecker', 
@@ -70,12 +75,11 @@
     var html = doc.innerHTML;
     win.document.write('<html>' + doc.innerHTML + '</html>');
     win.document.close();
-    addEventHandler(win, 'load', this.continueDisplay, this, win, words);
+    win.deentitize = function(str) {return str.deentitize()};
+    addEventHandler(win, 'load', this.continueDisplay, this, win, mapping);
 };
 
-KupuSpellChecker.prototype.continueDisplay = function(win, words) {
-    words = words.split(' ').removeDoubles();
-
+KupuSpellChecker.prototype.continueDisplay = function(win, mapping) {
     // walk through all elements of the body, colouring the text nodes
     var body = win.document.getElementsByTagName('body')[0];
     var iterator = new NodeIterator(body);
@@ -88,7 +92,7 @@
         if (node.nodeType == 3) {
             var span = win.document.createElement('span');
             var before = node.nodeValue;
-            var after = this.colourText(before, words);
+            var after = this.colourText(before, mapping);
             if (before != after) {
                 span.innerHTML = after;
                 var last = span.lastChild;
@@ -104,11 +108,14 @@
     };
 };
 
-KupuSpellChecker.prototype.colourText = function(text, words) {
+KupuSpellChecker.prototype.colourText = function(text, mapping) {
     var currtext = text;
     var newtext = '';
-    for (var i=0; i < words.length; i++) {
-        var reg = new RegExp('([^\w])(' + words[i] + ')([^\w])');
+    for (var word in mapping) {
+        var replacements = mapping[word];
+        replacements = replacements.entitize();
+        replacements = replacements.replace("'", "\\'", 'g');
+        var reg = new RegExp('([^\w])(' + word + ')([^\w])');
         while (true) {
             var match = reg.exec(currtext);
             if (!match) {
@@ -120,7 +127,10 @@
             var m = match[0];
             newtext += currtext.substr(0, currtext.indexOf(m));
             newtext += match[1] +
-                        '<span style="' + this.spanstyle + '">' +
+                        '<span style="' + this.spanstyle + '" ' +
+                        'onclick="alert(deentitize(\'' + 
+                        replacements + '\'));" ' +
+                        'title="' + replacements + '">' +
                         match[2] +
                         '</span>' +
                         match[3];
@@ -129,3 +139,15 @@
     };
     return currtext;
 };
+
+KupuSpellChecker.prototype.xmlToMapping = function(docnode) {
+    var docel = docnode.documentElement;
+    var result = {};
+    var incorrect = docel.getElementsByTagName('incorrect');
+    for (var i=0; i < incorrect.length; i++) {
+        var word = incorrect[i].firstChild.firstChild.nodeValue;
+        var replacements = incorrect[i].lastChild.firstChild.nodeValue;
+        result[word] = replacements;
+    };
+    return result;
+};

[prev in list] [next in list] [prev in thread] [next in thread]
Configure | About | News | Add a list | Sponsored by KoreLogic