summaryrefslogtreecommitdiffstats
path: root/JLanguageTool/src/java/de/danielnaber/languagetool/server/HTTPServer.java
blob: 7e1dc99dc5cd0223bb8fdb7b4557d3303466e6a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/* LanguageTool, a natural language style checker 
 * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package de.danielnaber.languagetool.server;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.prolixtech.jaminid.ContentOracle;
import com.prolixtech.jaminid.Daemon;
import com.prolixtech.jaminid.ProtocolResponseHeader;
import com.prolixtech.jaminid.Request;
import com.prolixtech.jaminid.Response;

import de.danielnaber.languagetool.JLanguageTool;
import de.danielnaber.languagetool.Language;
import de.danielnaber.languagetool.rules.RuleMatch;
import de.danielnaber.languagetool.tools.StringTools;

/**
 * A small embedded HTTP server that checks text. Returns XML, prints debugging
 * to stdout/stderr.
 * 
 * @author Daniel Naber
 */
public class HTTPServer extends ContentOracle {

  /**
   * JLanguageTool instances for each language (created and configured on fist use).
   * Instances are organized by language and mother language.
   * This is like a tree: first level contain the Languages, next level contains JLanguageTool instances for each mother tongue.
   */
  private static final Map<Language, Map<Language, JLanguageTool>> instances = new HashMap<Language, Map<Language, JLanguageTool>>();
  /**
   * The default port on which the server is running (8081).
   */
  public static final int DEFAULT_PORT = 8081;

  private static final int CONTEXT_SIZE = 40; // characters

  private Daemon daemon;
  private int port = DEFAULT_PORT;
  private boolean verbose;

  private static final Set<String> allowedIPs = new HashSet<String>();
  static {
    // accept only requests from localhost.
    // TODO: find a cleaner solution
    allowedIPs.add("/0:0:0:0:0:0:0:1"); // Suse Linux IPv6 stuff
    allowedIPs.add("/0:0:0:0:0:0:0:1%0"); // some(?) Mac OS X
    allowedIPs.add("/127.0.0.1");
  }

  /**
   * Prepare a server - use run() to start it.
   */
  public HTTPServer() {
  }

  /**
   * Prepare a server on the given port - use run() to start it.
   */
  public HTTPServer(int port) {
    this(port, false);
  }

  /**
   * Prepare a server on the given port - use run() to start it.
   * 
   * @param verbose
   *          if true, the text to check will be displayed in case of exceptions
   *          (default: false)
   */
  public HTTPServer(int port, boolean verbose) {
    this.port = port;
    this.verbose = verbose;
  }

  /**
   * Start the server.
   */
  public void run() {
    System.out.println("Starting server on port " + port + "...");
    daemon = new Daemon(port, this);
    if (daemon.isRunning())
      System.out.println("Server started");
    else
      throw new PortBindingException(
          "LanguageTool server could not be started " + "on port " + port
              + ", maybe something else is running on that port already?");
  }

  public String demultiplex(Request connRequest, Response connResponse) {
    synchronized(instances){
      final long timeStart = System.currentTimeMillis();
      String text = null;
      try {
        if (StringTools.isEmpty(connRequest.getLocation())) {
          connResponse.setStatus(403);
          throw new RuntimeException("Error: Access to "
              + connRequest.getLocation() + " denied");
        }
        if (allowedIPs.contains(connRequest.getIPAddressString())) {
          // TODO: temporary fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly) 
          // see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
	      fixRequestParamMap(connRequest);
	     
	      // return content base on request string.
	      // Refactor this when the number of known request types gets too big.

	      // request type: list known languages
	      if (connRequest.getLocation().endsWith("/Languages")) {
	        connResponse.setHeaderLine(ProtocolResponseHeader.Content_Type, "text/xml");
	        connResponse.setHeaderLine(ProtocolResponseHeader.Content_Encoding, "UTF-8");
            return getSupportedLanguagesAsXML();
	      }
	    
	      // request type: grammar checking (default type)
          final String langParam = connRequest.getParamOrNull("language");
          if (langParam == null)
            throw new IllegalArgumentException("Missing 'language' parameter");
          final Language lang = Language.getLanguageForShortName(langParam);
          if (lang == null)
            throw new IllegalArgumentException("Unknown language '" + langParam
                + "'");
          final String motherTongueParam = connRequest.getParamOrNull("motherTongue");
		  Language motherTongue = null;
          if (null != motherTongueParam)
            motherTongue = Language.getLanguageForShortName(motherTongueParam);
          final JLanguageTool lt = getLanguageToolInstance(lang, motherTongue);
          // TODO: how to take options from the client?
          // TODO: customize lt here after reading client options
          text = connRequest.getParamOrNull("text");
          if (text == null)
            throw new IllegalArgumentException("Missing 'text' parameter");
          print("Checking " + text.length() + " characters of text, language "
              + langParam);
          final List<RuleMatch> matches = lt.check(text);
          connResponse.setHeaderLine(ProtocolResponseHeader.Content_Type,
              "text/xml");
          // TODO: how to set the encoding to utf-8 if we can just return a
          // String?
          connResponse.setHeaderLine(ProtocolResponseHeader.Content_Encoding,
              "UTF-8");
          final String response = StringTools.ruleMatchesToXML(matches, text,
              CONTEXT_SIZE, StringTools.XmlPrintMode.NORMAL_XML);
          print("Check done in " + (System.currentTimeMillis() - timeStart)
              + "ms");
          return response;
        }
        connResponse.setStatus(403);
        throw new RuntimeException("Error: Access from "
            + connRequest.getIPAddressString() + " denied");
      } catch (Exception e) {
        if (verbose)
          print("Exceptions was caused by this text: " + text);
        e.printStackTrace();
        connResponse.setStatus(500);
        // escape input to avoid XSS attacks:
        return "Error: " + StringTools.escapeXML(e.toString());
      }
	}
  }

  private void print(String s) {
    System.out.println(getDate() + " " + s);
  }

  private String getDate() {
    final SimpleDateFormat sdf = new SimpleDateFormat();
    return sdf.format(new Date());
  }

  /**
   * Stop the server process.
   */
  public void stop() {
    System.out.println("Stopping server...");
    daemon.tearDown();
    System.out.println("Server stopped");
  }

  private static void printUsageAndExit() {
    System.out.println("Usage: HTTPServer [-p|--port port]");
    System.exit(1);
  }

  /**
   * Private fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly) 
   * see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
   * 
   * @param connRequest the Request object from jaminid ContentOracle. 
   * @throws UnsupportedEncodingException If character encoding needs to be consulted, but named character encoding is not supported.
   */
  private void fixRequestParamMap(final Request connRequest) throws UnsupportedEncodingException {
    final Map<String, String> paramMap = getParamMap(connRequest);
    connRequest.getParamMap().clear();
    connRequest.getParamMap().putAll(paramMap);
  }

  /**
   * Private fix until jaminid bug is fixed (it seams that non-asci characters are not handled correctly) 
   * see https://sourceforge.net/tracker/?func=detail&aid=2876507&group_id=127764&atid=709370
   * Method to get the requst parameters from the request string. The default implementation can't handle 
   * the UTF-8 characters (like șțîâ). We just use  URLDecoder.decode() instead of the default unescape private method.   
   * @param connRequest the Request object from jaminid ContentOracle.
   * @return the parameters map.
   * @throws UnsupportedEncodingException If character encoding needs to be consulted, but named character encoding is not supported
   */
  private Map<String, String> getParamMap(Request connRequest) throws UnsupportedEncodingException {
    final Map<String, String> paramMap = new HashMap<String, String>();
    if (null == connRequest) 
      return paramMap;
    String requestStr = null;
    if (!StringTools.isEmpty(connRequest.getBody())) {
     requestStr = connRequest.getBody(); // POST
    } else {
      requestStr = connRequest.getParamString(); // GET
    }
    if (StringTools.isEmpty(requestStr))
      return paramMap;

    final String[] comps = requestStr.split("&");
    for (String comp : comps) {
      final int equalsLoc = comp.indexOf("=");
      if (equalsLoc > 0) {
        paramMap.put(comp.substring(0, equalsLoc),
                URLDecoder.decode(comp.substring(equalsLoc + 1), "UTF-8"));
        // TODO: Find some way to determine the encoding used on client-side
        // maybe "Accept-Charset" request header could be used.
        // UTF-8 will work on most platforms and browsers.
      } else {
        paramMap.put(comp, "");
      }
    }
    return paramMap;	
  }
  
  /**
   * Find or create a JLanguageTool instance for a specific language and mother tongue.
   * The instance will be reused. If any customization is required (like disabled rules), 
   * it will be done after acquiring this instance.
   * 
   * @param lang the language to be used.
   * @param motherTongue the user's mother tongue or <code>null</code>
   * @return a JLanguageTool instance for a specific language and mother tongue.
   * @throws Exception when JLanguageTool creation failed
   */
  private JLanguageTool getLanguageToolInstance(Language lang, Language motherTongue) 
          throws Exception {
    Map<Language, JLanguageTool> languageTools = instances.get(lang);
    if (null == languageTools) {
      // first call using this language
      languageTools = new HashMap<Language, JLanguageTool>();
      instances.put(lang, languageTools);
    }
    final JLanguageTool languageTool = languageTools.get(motherTongue);
    if (null == languageTool) {
      print("Creating JLanguageTool instance for language " + lang + ((null != motherTongue)?(" and mother tongue " + motherTongue):""));
      final JLanguageTool newLanguageTool = new JLanguageTool(lang, motherTongue);
      newLanguageTool.activateDefaultPatternRules();
      newLanguageTool.activateDefaultFalseFriendRules();
      languageTools.put(motherTongue, newLanguageTool);
      return newLanguageTool;
    }
    return languageTool;
  }

	/**
   * Construct an xml string containing all supported languages. <br/>The xml format is:<br/>
   * &lt;languages&gt;<br/>
   *	&nbsp;&nbsp;&lt;language name="Catalan" abbr="ca" /&gt;<br/> 
   *    &nbsp;&nbsp;&lt;language name="Dutch" abbr="nl" /&gt;<br/>
   *    &nbsp;&nbsp;...<br/>
   *  &lt;languages&gt;<br/>
   *  The languages are alphabetically sorted.  
   * @return an xml string containing all supported languages.
   */
  public static String getSupportedLanguagesAsXML() {
    final List<Language> languages = Arrays.asList(Language.REAL_LANGUAGES);
    Collections.sort(languages, 
      new Comparator<Language>() {
        public int compare(Language o1, Language o2) {
		  return o1.getName().compareTo(o2.getName());
		}
      });
    final StringBuilder xmlBuffer = new StringBuilder("<?xml version='1.0' encoding='UTF-8'?>\n<languages>\n");
    for (Language lang : languages) {
     xmlBuffer.append(String.format("\t<language name=\"%s\" abbr=\"%s\" /> \n", lang.getName(), lang.getShortName()));
    }
    xmlBuffer.append("</languages>\n");
    return xmlBuffer.toString();
  }
	
  /**
   * Start the server from command line. Usage:
   * <tt>HTTPServer [-v|--verbose] [-p|--port port]</tt>
   */
  public static void main(String[] args) {
    if (args.length > 3) {
      printUsageAndExit();
    }
    boolean verbose = false;
    int port = DEFAULT_PORT;
    for (int i = 0; i < args.length; i++) {
      if ("-p".equals(args[i]) || "--port".equals(args[i])) {
        port = Integer.parseInt(args[++i]);
      } else if ("-v".equals(args[i]) || "--verbose".equals(args[i])) {
        verbose = true;
      }
    }
    final HTTPServer server = new HTTPServer(port, verbose);
    server.run();
  }

}