#!/usr/bin/python # -*- coding: iso-8859-1 -*- # # LanguageTool -- A Rule-Based Style and Grammar Checker # Copyright (C) 2002,2003,2004 Daniel Naber # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import cgi import string import sys import os os.chdir(sys.path[0]) sys.path.append(sys.path[0]) sys.path.append(os.path.join(sys.path[0], "src")) import TagInfo import TextChecker # FIXME: for debugging only import cgitb cgitb.enable() def main(): form = cgi.FieldStorage() if form.getvalue("text"): check(form) elif form.getvalue("explain"): displayExplanation(form) else: displayForm(form) return def displayExplanation(form): print "Content-Type: text/html\n\n" tag = cgi.escape(form.getvalue("explain")) # security: anti XSS taginfo = TagInfo.TagInfo(form.getvalue("lang")) print """ LanguageTool: Tag explanation for %s

Tag explanation for %s

%s: %s

""" % (tag, tag, tag, taginfo.getExp(form.getvalue("explain"))) return def displayForm(form): print "Content-Type: text/html\n\n" print """ LanguageTool Web Interface

LanguageTool Web Interface

Enter text here:


Show part-of-speech tags Check for false friends (for German native speakers)
Check for some style issues (e.g. don't instead of do not)
Complain about long sentences (more than 30 words)

Suggested test text
The incorrect words are red (copy and paste into text field to try it):

Then he look at the building.
I definitely think is should be less than four years.
This allows to provide a powerful a help system.
His house is as big like mine.
His car is larger then mine.


LanguageTool Homepage """ return def check(form): print "Content-Type: text/html\n\n" text = form.getvalue("text").decode('latin1') if not text: text = "" # TODO: put options for alle these in the web page? too confusing... grammar = None falsefriends = None words = None builtin = None textlanguage = "en" if not form.getvalue("style"): words = ["__NONE"] mothertongue = "de" if not form.getvalue("german_ff"): mothertongue = None if form.getvalue("lang"): textlanguage = form.getvalue("lang") max_sentence_length = 0 if form.getvalue("sentencelength"): max_sentence_length = None checker = TextChecker.TextChecker(grammar, falsefriends, words, builtin, \ textlanguage, mothertongue, max_sentence_length) print """ Check result

Result

""" (rule_matches, res, tags) = checker.check(text) # TODO: add an option to print the complete checker XML response: #print "XML reply:
%s
" % cgi.escape(res) #print "
%s
" % tags taglist = [] char_count = 0 list_count = 0 text_list = [] for tag_triple in tags: tag_str = "" if form.getvalue("tags") and tag_triple[2]: w = tag_triple[2] tag_str = '[%s]' \ % (w, textlanguage, w, w) if tag_triple[2] == 'SENT_END': tag_str = '%s
\n' % tag_str word = cgi.escape(tag_triple[0]) text_list.append(word) text_list.append(tag_str) char_count = char_count + len(word) # guarantee that the rule_matches are ordered by their position: rule_matches.sort() rule_matches.reverse() # add messages from end of list to avoid count confusion for rule_match in rule_matches: # TODO: this produces invalid code if the rule ranges are nested! ct = 0 i = 0 start_found = 0 end_found = 0 for el in text_list: if not el.startswith("= from_pos and not end_found: text_list[i] = '%s[%s (%s)]' % (text_list[i], \ rule_match.message, rule_match.id) end_found = 1 elif rule_match.from_pos <= to_pos and rule_match.from_pos >= from_pos and not start_found: text_list[i] = '%s' % text_list[i] start_found = 1 ct = ct + len(el) if end_found and start_found: break i = i + 1 text = string.join(text_list, '') print text.encode('latin1') if len(rule_matches) == 1: print "

%d possible error found.

" % len(rule_matches) else: print "

%d possible errors found.

" % len(rule_matches) #print "

" + cgi.escape(res) print "" main()