/* LanguageTool, a natural language style checker * Copyright (C) 2007 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package de.danielnaber.languagetool; import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.XMLConstants; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; import de.danielnaber.languagetool.tools.StringTools; /** * Validate XML files with a given DTD. * * @author Daniel Naber */ public final class XMLValidator { public XMLValidator() { } /** * Check some limits of our simplified XML output. */ public void checkSimpleXMLString(String xml) throws IOException { Pattern p = Pattern.compile("()", Pattern.DOTALL|Pattern.MULTILINE); Matcher matcher = p.matcher(xml); int pos = 0; while (matcher.find(pos)) { String errorElement = matcher.group(); pos = matcher.end(); if (errorElement.contains("\n") || errorElement.contains("\r")) throw new IOException(" may not contain line breaks"); char beforeError = xml.charAt(matcher.start()-1); if (beforeError != '\n' && beforeError != '\r') throw new IOException("Each must start on a new line"); } } /** * Validate XML with the given DTD. Throws exception on error. */ public void validateXMLString(String xml, String dtdFile, String docType) throws SAXException, IOException, ParserConfigurationException { validateInternal(xml, dtdFile, docType); } /** * Validate XML file with the given DTD. Throws exception on error. */ public final void validate(String filename, String dtdFile, String docType) throws IOException { try { String xml = StringTools.readFile(this.getClass().getResourceAsStream(filename), "utf-8"); validateInternal(xml, dtdFile, docType); } catch (Exception e) { IOException ioe = new IOException("Cannot load or parse '"+filename+"'"); ioe.initCause(e); throw ioe; } } /** * Validate XML file using the given XSD. Throws an exception on error * @param filename File to validate. * @param xmlSchema Schema to use. * @throws IOException Thrown on error. */ public final void validate(String filename, String xmlSchema) throws IOException { try { validateInternal(this.getClass().getResourceAsStream(filename), this.getClass().getResource(xmlSchema)); } catch (Exception e) { IOException ioe = new IOException("Cannot load or parse '"+filename+"'"); ioe.initCause(e); throw ioe; } } private void validateInternal(String xml, String dtdFile, String doctype) throws SAXException, IOException, ParserConfigurationException { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(true); SAXParser saxParser = factory.newSAXParser(); //used for removing existing DOCTYPE from grammar.xml files xml = xml.replaceAll("", ""); final String decl = ""; final String dtd = ""; int pos = xml.indexOf(decl); int endPos = xml.indexOf(endDecl); if (pos == -1) throw new IOException("No XML declaration found in '" + xml.substring(0, Math.min(100, xml.length())) + "...'"); String newXML = xml.substring(0, endPos+endDecl.length()) + "\r\n" + dtd + xml.substring(endPos+endDecl.length()); //System.err.println(newXML); InputSource is = new InputSource(new StringReader(newXML)); saxParser.parse(is, new ErrorHandler()); } private void validateInternal(InputStream xml, URL xmlSchema) throws SAXException, IOException, ParserConfigurationException { SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = sf.newSchema(xmlSchema); Validator validator = schema.newValidator(); validator.setErrorHandler(new ErrorHandler()); validator.validate(new StreamSource(xml)); } } /** * XML handler that throws exception on error and warning, does nothing otherwise. */ class ErrorHandler extends DefaultHandler { public void warning (SAXParseException e) throws SAXException { System.err.println(e.getMessage() + " Problem found at line " + e.getLineNumber() + ", column " + e.getColumnNumber() + "."); throw e; } public void error (SAXParseException e) throws SAXException { System.err.println(e.getMessage() + " Problem found at line " + e.getLineNumber() + ", column " + e.getColumnNumber() + "."); throw e; } }