[prev in list] [next in list] [prev in thread] [next in thread]
List: kupu-checkins
Subject: [kupu-checkins] r11669 - in kupu/trunk/kupu: cgi common
From: guido () codespeak ! net
Date: 2005-04-30 23:12:02
Message-ID: 20050430231202.E009C27BBA () code1 ! codespeak ! net
[Download RAW message or body]
Author: guido
Date: Sun May 1 01:12:02 2005
New Revision: 11669
Modified:
kupu/trunk/kupu/cgi/spellcheck.cgi
kupu/trunk/kupu/common/kupuspellchecker.js
Log:
Changed the spellchecking feature a bit: instead of plain text now an XML file
is returned by the CGI script, containing replacement hints in addition to
information about what words are incorrect.
Modified: kupu/trunk/kupu/cgi/spellcheck.cgi
==============================================================================
--- kupu/trunk/kupu/cgi/spellcheck.cgi (original)
+++ kupu/trunk/kupu/cgi/spellcheck.cgi Sun May 1 01:12:02 2005
@@ -2,44 +2,87 @@
"""SpellChecker for Kupu"""
-COMMAND = 'aspell -l'
+COMMAND = 'aspell -a'
import popen2, re
class SpellChecker:
"""Simple spell checker, uses ispell (or aspell) with pipes"""
- reg_split = re.compile("[^\w']+", re.UNICODE)
+ reg_unknown = re.compile('^& ([\w\']*) \d* \d*: (.*)$', re.U)
- def __init__(self, text):
+ def __init__(self):
self.chout, self.chin = popen2.popen2(COMMAND)
- self.text = text
- self.result = None
+ # throw away intro
+ self.read_line()
- def check(self):
+ def __del__(self):
+ self.chout.close()
+ self.chin.close()
+
+ def check(self, text):
"""checks a line of text
returns None if spelling was okay, and an HTML string with words
that weren't recognized marked (with a span class="wrong_spelling")
"""
- if self.result is None:
- # send it to the child app
- self.chin.write(self.text)
+ result = {}
+ for line in text.split('\n'):
+ line = line.strip()
+ if line:
+ self.write_line(line)
+ while 1:
+ resline = self.read_line()
+ if not resline.strip():
+ break
+ if resline.strip() != '*':
+ match = self.reg_unknown.match(resline)
+ assert match, 'Unknown formatted line: %s' % resline
+ word = match.group(1)
+ if result.has_key(word):
+ continue
+ replacements = match.group(2).split(', ')
+ result[word] = replacements
+ return result
+
+ def read_line(self):
+ buf = []
+ while 1:
+ char = self.read_char()
+ if char == '\n':
+ return ''.join(buf)
+ buf.append(char)
+
+ def write_line(self, line):
+ try:
+ self.chin.write('%s\n' % line)
self.chin.flush()
- # close in (this makes the app spew out the result)
- self.chin.close()
-
- # read the result
- result = self.chout.read()
- result = ' '.join(result.split())
- if not result:
- result = None
+ return
+ except IOError:
+ self.reconnect()
+ self.chin.write('%s\n' % line)
+ self.chin.flush()
+ return
+ raise
- # close out
+ def read_char(self):
+ try:
+ return self.chout.read(1)
+ except IOError:
+ self.reconnect()
+ return self.chout.read(1)
+ raise
+
+ def reconnect(self):
+ try:
self.chout.close()
-
- self.result = result
- return self.result
+ except IOError:
+ pass
+ try:
+ self.chin.close()
+ except IOError:
+ pass
+ self.chout, self.chin = popen2.popen2(COMMAND)
def is_cgi():
import os
@@ -47,28 +90,43 @@
return True
return False
-if is_cgi():
- import cgi, cgitb
- #cgitb.enable()
- #result = repr(sys.stdin.read())
- data = cgi.FieldStorage()
- data = data['text'].value
- c = SpellChecker(data)
- result = c.check()
- if result == None:
- result = ''
- print 'Content-Type: text/plain'
- print 'Content-Length: %s' % len(result)
- print
- print result
-elif __name__ == '__main__':
- while 1:
- line = raw_input('Enter text to check: ')
- if line == 'q':
- break
- c = SpellChecker(line)
- ret = c.check()
- if ret is None:
- print 'okay'
+def format_result(result):
+ """convert the result dict to XML"""
+ buf = ['<?xml version="1.0" encoding="UTF-8" ?>\n<spellcheck_result>']
+ for key, value in result.items():
+ buf.append('<incorrect><word>')
+ buf.append(key)
+ buf.append('</word><replacements>')
+ buf.append(' '.join(value))
+ buf.append('</replacements></incorrect>')
+ buf.append('</spellcheck_result>')
+ return ''.join(buf)
+
+if __name__ == '__main__':
+ if is_cgi():
+ import cgi, cgitb
+ #cgitb.enable()
+ #result = repr(sys.stdin.read())
+ data = cgi.FieldStorage()
+ data = data['text'].value
+ c = SpellChecker()
+ result = c.check(data)
+ if result == None:
+ result = ''
else:
- print ret
+ result = format_result(result)
+ print 'Content-Type: text/xml,charset=UTF-8'
+ print 'Content-Length: %s' % len(result)
+ print
+ print result
+ else:
+ c = SpellChecker()
+ while 1:
+ line = raw_input('Enter text to check: ')
+ if line == 'q':
+ break
+ ret = c.check(line)
+ if ret is None:
+ print 'okay'
+ else:
+ print ret
Modified: kupu/trunk/kupu/common/kupuspellchecker.js
==============================================================================
--- kupu/trunk/kupu/common/kupuspellchecker.js (original)
+++ kupu/trunk/kupu/common/kupuspellchecker.js Sun May 1 01:12:02 2005
@@ -31,11 +31,16 @@
KupuSpellChecker.prototype.stateChangeHandler = function(request) {
if (request.readyState == 4) {
- var result = request.responseText;
- if (!result) {
- alert('There were no errors.');
+ if (request.status == '200') {
+ var result = request.responseXML;
+ result = this.xmlToMapping(result);
+ if (!result) {
+ alert('There were no errors.');
+ } else {
+ this.displayUnrecognized(result);
+ };
} else {
- this.displayUnrecognized(result);
+ alert('Error loading data, status ' + request.status);
};
};
};
@@ -60,7 +65,7 @@
return bits.join(' ');
};
-KupuSpellChecker.prototype.displayUnrecognized = function(words) {
+KupuSpellChecker.prototype.displayUnrecognized = function(mapping) {
// copy the current editable document into a new window
var doc = this.editor.getInnerDocument().documentElement;
var win = window.open('kupublank.html', 'spellchecker',
@@ -70,12 +75,11 @@
var html = doc.innerHTML;
win.document.write('<html>' + doc.innerHTML + '</html>');
win.document.close();
- addEventHandler(win, 'load', this.continueDisplay, this, win, words);
+ win.deentitize = function(str) {return str.deentitize()};
+ addEventHandler(win, 'load', this.continueDisplay, this, win, mapping);
};
-KupuSpellChecker.prototype.continueDisplay = function(win, words) {
- words = words.split(' ').removeDoubles();
-
+KupuSpellChecker.prototype.continueDisplay = function(win, mapping) {
// walk through all elements of the body, colouring the text nodes
var body = win.document.getElementsByTagName('body')[0];
var iterator = new NodeIterator(body);
@@ -88,7 +92,7 @@
if (node.nodeType == 3) {
var span = win.document.createElement('span');
var before = node.nodeValue;
- var after = this.colourText(before, words);
+ var after = this.colourText(before, mapping);
if (before != after) {
span.innerHTML = after;
var last = span.lastChild;
@@ -104,11 +108,14 @@
};
};
-KupuSpellChecker.prototype.colourText = function(text, words) {
+KupuSpellChecker.prototype.colourText = function(text, mapping) {
var currtext = text;
var newtext = '';
- for (var i=0; i < words.length; i++) {
- var reg = new RegExp('([^\w])(' + words[i] + ')([^\w])');
+ for (var word in mapping) {
+ var replacements = mapping[word];
+ replacements = replacements.entitize();
+ replacements = replacements.replace("'", "\\'", 'g');
+ var reg = new RegExp('([^\w])(' + word + ')([^\w])');
while (true) {
var match = reg.exec(currtext);
if (!match) {
@@ -120,7 +127,10 @@
var m = match[0];
newtext += currtext.substr(0, currtext.indexOf(m));
newtext += match[1] +
- '<span style="' + this.spanstyle + '">' +
+ '<span style="' + this.spanstyle + '" ' +
+ 'onclick="alert(deentitize(\'' +
+ replacements + '\'));" ' +
+ 'title="' + replacements + '">' +
match[2] +
'</span>' +
match[3];
@@ -129,3 +139,15 @@
};
return currtext;
};
+
+KupuSpellChecker.prototype.xmlToMapping = function(docnode) {
+ var docel = docnode.documentElement;
+ var result = {};
+ var incorrect = docel.getElementsByTagName('incorrect');
+ for (var i=0; i < incorrect.length; i++) {
+ var word = incorrect[i].firstChild.firstChild.nodeValue;
+ var replacements = incorrect[i].lastChild.firstChild.nodeValue;
+ result[word] = replacements;
+ };
+ return result;
+};
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic