Di seguito riporto due metodi che controllano se un determinato carattere รจ valido secondo lo standard XML 1.0
Metodo A
/** * This method ensures that the output String has only * valid XML unicode characters as specified by the * XML 1.0 standard. For reference, please see * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the * standard</a>. This method will return an empty * String if the input is null or empty. * * @param in The String whose non-valid characters we want to remove. * @return The in String, stripped of non-valid characters. */ public String stripNonValidXMLCharacters(String in) { StringBuffer out = new StringBuffer(); // Used to hold the output. char current; // Used to reference the current character. if (in == null || ("".equals(in))) return ""; // vacancy test. for (int i = 0; i < in.length(); i++) { current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; //it should not happen. if ((current == 0x9) || (current == 0xA) || (current == 0xD) || ((current >= 0x20) && (current <= 0xD7FF)) || ((current >= 0xE000) && (current <= 0xFFFD)) || ((current >= 0x10000) && (current <= 0x10FFFF))) out.append(current); } return out.toString(); } Metodo B
public static boolean iSValidXMLText(String xml) {
boolean valid = true;
if( xml != null ) {
valid = xml.matches("^([\\x09\\x0A\\x0D\\x20-\\x7E]|" //# ASCII
+ "[\\xC2-\\xDF][\\x80-\\xBF]|" //# non-overlong 2-byte
+ "\\xE0[\\xA0-\\xBF][\\x80-\\xBF]|" //# excluding overlongs
+ "[\\xE1-\\xEC\\xEE\\xEF][\\x80-\\xBF]{2}|" //# straight 3-byte
+ "\\xED[\\x80-\\x9F][\\x80-\\xBF]|" //# excluding surrogates
+ "\\xF0[\\x90-\\xBF][\\x80-\\xBF]{2}|" //# planes 1-3
+ "[\\xF1-\\xF3][\\x80-\\xBF]{3}|" //# planes 4-15
+ "\\xF4[\\x80-\\x8F][\\x80-\\xBF]{2})*$"); //# plane 16
}
return valid;
}