001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.File; 020import java.io.FileNotFoundException; 021import java.io.FileOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.StringWriter; 026import java.io.Writer; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.commons.io.input.XmlStreamReader; 031 032/** 033 * Character stream that handles all the necessary Voodoo to figure out the 034 * charset encoding of the XML document written to the stream. 035 * 036 * @see XmlStreamReader 037 * @since 2.0 038 */ 039public class XmlStreamWriter extends Writer { 040 private static final int BUFFER_SIZE = 4096; 041 042 private final OutputStream out; 043 044 private final String defaultEncoding; 045 046 private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE); 047 048 private Writer writer; 049 050 private String encoding; 051 052 /** 053 * Constructs a new XML stream writer for the specified output stream 054 * with a default encoding of UTF-8. 055 * 056 * @param out The output stream 057 */ 058 public XmlStreamWriter(final OutputStream out) { 059 this(out, null); 060 } 061 062 /** 063 * Constructs a new XML stream writer for the specified output stream 064 * with the specified default encoding. 065 * 066 * @param out The output stream 067 * @param defaultEncoding The default encoding if not encoding could be detected 068 */ 069 public XmlStreamWriter(final OutputStream out, final String defaultEncoding) { 070 this.out = out; 071 this.defaultEncoding = defaultEncoding != null ? defaultEncoding : "UTF-8"; 072 } 073 074 /** 075 * Constructs a new XML stream writer for the specified file 076 * with a default encoding of UTF-8. 077 * 078 * @param file The file to write to 079 * @throws FileNotFoundException if there is an error creating or 080 * opening the file 081 */ 082 public XmlStreamWriter(final File file) throws FileNotFoundException { 083 this(file, null); 084 } 085 086 /** 087 * Constructs a new XML stream writer for the specified file 088 * with the specified default encoding. 089 * 090 * @param file The file to write to 091 * @param defaultEncoding The default encoding if not encoding could be detected 092 * @throws FileNotFoundException if there is an error creating or 093 * opening the file 094 */ 095 public XmlStreamWriter(final File file, final String defaultEncoding) throws FileNotFoundException { 096 this(new FileOutputStream(file), defaultEncoding); 097 } 098 099 /** 100 * Returns the detected encoding. 101 * 102 * @return the detected encoding 103 */ 104 public String getEncoding() { 105 return encoding; 106 } 107 108 /** 109 * Returns the default encoding. 110 * 111 * @return the default encoding 112 */ 113 public String getDefaultEncoding() { 114 return defaultEncoding; 115 } 116 117 /** 118 * Closes the underlying writer. 119 * 120 * @throws IOException if an error occurs closing the underlying writer 121 */ 122 @Override 123 public void close() throws IOException { 124 if (writer == null) { 125 encoding = defaultEncoding; 126 writer = new OutputStreamWriter(out, encoding); 127 writer.write(xmlPrologWriter.toString()); 128 } 129 writer.close(); 130 } 131 132 /** 133 * Flushes the underlying writer. 134 * 135 * @throws IOException if an error occurs flushing the underlying writer 136 */ 137 @Override 138 public void flush() throws IOException { 139 if (writer != null) { 140 writer.flush(); 141 } 142 } 143 144 /** 145 * Detects the encoding. 146 * 147 * @param cbuf the buffer to write the characters from 148 * @param off The start offset 149 * @param len The number of characters to write 150 * @throws IOException if an error occurs detecting the encoding 151 */ 152 private void detectEncoding(final char[] cbuf, final int off, final int len) 153 throws IOException { 154 int size = len; 155 final StringBuffer xmlProlog = xmlPrologWriter.getBuffer(); 156 if (xmlProlog.length() + len > BUFFER_SIZE) { 157 size = BUFFER_SIZE - xmlProlog.length(); 158 } 159 xmlPrologWriter.write(cbuf, off, size); 160 161 // try to determine encoding 162 if (xmlProlog.length() >= 5) { 163 if (xmlProlog.substring(0, 5).equals("<?xml")) { 164 // try to extract encoding from XML prolog 165 final int xmlPrologEnd = xmlProlog.indexOf("?>"); 166 if (xmlPrologEnd > 0) { 167 // ok, full XML prolog written: let's extract encoding 168 final Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, 169 xmlPrologEnd)); 170 if (m.find()) { 171 encoding = m.group(1).toUpperCase(); 172 encoding = encoding.substring(1, encoding.length() - 1); 173 } else { 174 // no encoding found in XML prolog: using default 175 // encoding 176 encoding = defaultEncoding; 177 } 178 } else { 179 if (xmlProlog.length() >= BUFFER_SIZE) { 180 // no encoding found in first characters: using default 181 // encoding 182 encoding = defaultEncoding; 183 } 184 } 185 } else { 186 // no XML prolog: using default encoding 187 encoding = defaultEncoding; 188 } 189 if (encoding != null) { 190 // encoding has been chosen: let's do it 191 xmlPrologWriter = null; 192 writer = new OutputStreamWriter(out, encoding); 193 writer.write(xmlProlog.toString()); 194 if (len > size) { 195 writer.write(cbuf, off + size, len - size); 196 } 197 } 198 } 199 } 200 201 /** 202 * Writes the characters to the underlying writer, detecting encoding. 203 * 204 * @param cbuf the buffer to write the characters from 205 * @param off The start offset 206 * @param len The number of characters to write 207 * @throws IOException if an error occurs detecting the encoding 208 */ 209 @Override 210 public void write(final char[] cbuf, final int off, final int len) throws IOException { 211 if (xmlPrologWriter != null) { 212 detectEncoding(cbuf, off, len); 213 } else { 214 writer.write(cbuf, off, len); 215 } 216 } 217 218 static final Pattern ENCODING_PATTERN = XmlStreamReader.ENCODING_PATTERN; 219}