XML characters allowed

Di seguito riporto due metodi che controllano se un determinato carattere รจ valido secondo lo standard XML 1.0

Metodo A

  /**
     * This method ensures that the output String has only
     * valid XML unicode characters as specified by the
     * XML 1.0 standard. For reference, please see
     * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
     * standard</a>. This method will return an empty
     * String if the input is null or empty.
     *
     * @param in The String whose non-valid characters we want to remove.
     * @return The in String, stripped of non-valid characters.
     */
    public String stripNonValidXMLCharacters(String in) {
        StringBuffer out = new StringBuffer(); // Used to hold the output.
        char current; // Used to reference the current character.

        if (in == null || ("".equals(in))) return ""; // vacancy test.
        for (int i = 0; i < in.length(); i++) {
            current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here;
                                    //it should not happen.
            if ((current == 0x9) ||
                (current == 0xA) ||
                (current == 0xD) ||
                ((current >= 0x20) && (current <= 0xD7FF)) ||
                ((current >= 0xE000) && (current <= 0xFFFD)) ||
                ((current >= 0x10000) && (current <= 0x10FFFF)))
                out.append(current);
        }
        return out.toString();
    }

Metodo B

public static boolean iSValidXMLText(String xml) {
boolean valid = true;

if( xml != null ) {
valid = xml.matches("^([\\x09\\x0A\\x0D\\x20-\\x7E]|" //# ASCII
+ "[\\xC2-\\xDF][\\x80-\\xBF]|" //# non-overlong 2-byte
+ "\\xE0[\\xA0-\\xBF][\\x80-\\xBF]|" //# excluding overlongs
+ "[\\xE1-\\xEC\\xEE\\xEF][\\x80-\\xBF]{2}|" //# straight 3-byte
+ "\\xED[\\x80-\\x9F][\\x80-\\xBF]|" //# excluding surrogates
+ "\\xF0[\\x90-\\xBF][\\x80-\\xBF]{2}|" //# planes 1-3
+ "[\\xF1-\\xF3][\\x80-\\xBF]{3}|" //# planes 4-15
+ "\\xF4[\\x80-\\x8F][\\x80-\\xBF]{2})*$"); //# plane 16
}

return valid;
}

Leave a Reply