|
|
@ -205,112 +205,112 @@ public final class HTMLFilterUtil { |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public boolean isAlwaysMakeTags() { |
|
|
|
public boolean isAlwaysMakeTags() { |
|
|
|
return alwaysMakeTags; |
|
|
|
return alwaysMakeTags; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public boolean isStripComments() { |
|
|
|
public boolean isStripComments() { |
|
|
|
return stripComment; |
|
|
|
return stripComment; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String escapeComments(final String s) { |
|
|
|
private String escapeComments(final String s) { |
|
|
|
final Matcher m = P_COMMENTS.matcher(s); |
|
|
|
final Matcher m = P_COMMENTS.matcher(s); |
|
|
|
final StringBuffer buf = new StringBuffer(); |
|
|
|
final StringBuffer buf = new StringBuffer(); |
|
|
|
if (m.find()) { |
|
|
|
if (m.find()) { |
|
|
|
final String match = m.group(1); // (.*?)
|
|
|
|
final String match = m.group(1); // (.*?)
|
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement("<!--" + htmlSpecialChars(match) + "-->")); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement("<!--" + htmlSpecialChars(match) + "-->")); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
|
|
|
|
|
|
|
|
return buf.toString(); |
|
|
|
return buf.toString(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String balanceHTML(String s) { |
|
|
|
private String balanceHTML(String s) { |
|
|
|
if (alwaysMakeTags) { |
|
|
|
if (alwaysMakeTags) { |
|
|
|
//
|
|
|
|
//
|
|
|
|
// try and form html
|
|
|
|
// try and form html
|
|
|
|
//
|
|
|
|
//
|
|
|
|
s = regexReplace(P_END_ARROW, "", s); |
|
|
|
s = regexReplace(P_END_ARROW, "", s); |
|
|
|
s = regexReplace(P_BODY_TO_END, "<$1>", s); |
|
|
|
s = regexReplace(P_BODY_TO_END, "<$1>", s); |
|
|
|
s = regexReplace(P_XML_CONTENT, "$1<$2", s); |
|
|
|
s = regexReplace(P_XML_CONTENT, "$1<$2", s); |
|
|
|
|
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
//
|
|
|
|
//
|
|
|
|
// escape stray brackets
|
|
|
|
// escape stray brackets
|
|
|
|
//
|
|
|
|
//
|
|
|
|
s = regexReplace(P_STRAY_LEFT_ARROW, "<$1", s); |
|
|
|
s = regexReplace(P_STRAY_LEFT_ARROW, "<$1", s); |
|
|
|
s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2><", s); |
|
|
|
s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2><", s); |
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// the last regexp causes '<>' entities to appear
|
|
|
|
// the last regexp causes '<>' entities to appear
|
|
|
|
// (we need to do a lookahead assertion so that the last bracket can
|
|
|
|
// (we need to do a lookahead assertion so that the last bracket can
|
|
|
|
// be used in the next pass of the regexp)
|
|
|
|
// be used in the next pass of the regexp)
|
|
|
|
//
|
|
|
|
//
|
|
|
|
s = regexReplace(P_BOTH_ARROWS, "", s); |
|
|
|
s = regexReplace(P_BOTH_ARROWS, "", s); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String checkTags(String s) { |
|
|
|
private String checkTags(String s) { |
|
|
|
Matcher m = P_TAGS.matcher(s); |
|
|
|
Matcher m = P_TAGS.matcher(s); |
|
|
|
|
|
|
|
|
|
|
|
final StringBuffer buf = new StringBuffer(); |
|
|
|
final StringBuffer buf = new StringBuffer(); |
|
|
|
while (m.find()) { |
|
|
|
while (m.find()) { |
|
|
|
String replaceStr = m.group(1); |
|
|
|
String replaceStr = m.group(1); |
|
|
|
replaceStr = processTag(replaceStr); |
|
|
|
replaceStr = processTag(replaceStr); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(replaceStr)); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(replaceStr)); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
|
|
|
|
|
|
|
|
// these get tallied in processTag
|
|
|
|
// these get tallied in processTag
|
|
|
|
// (remember to reset before subsequent calls to filter method)
|
|
|
|
// (remember to reset before subsequent calls to filter method)
|
|
|
|
final StringBuilder sBuilder = new StringBuilder(buf.toString()); |
|
|
|
final StringBuilder sBuilder = new StringBuilder(buf.toString()); |
|
|
|
for (String key : vTagCounts.keySet()) { |
|
|
|
for (String key : vTagCounts.keySet()) { |
|
|
|
for (int ii = 0; ii < vTagCounts.get(key); ii++) { |
|
|
|
for (int ii = 0; ii < vTagCounts.get(key); ii++) { |
|
|
|
sBuilder.append("</").append(key).append(">"); |
|
|
|
sBuilder.append("</").append(key).append(">"); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
s = sBuilder.toString(); |
|
|
|
s = sBuilder.toString(); |
|
|
|
|
|
|
|
|
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String processRemoveBlanks(final String s) { |
|
|
|
private String processRemoveBlanks(final String s) { |
|
|
|
String result = s; |
|
|
|
String result = s; |
|
|
|
for (String tag : vRemoveBlanks) { |
|
|
|
for (String tag : vRemoveBlanks) { |
|
|
|
if (!P_REMOVE_PAIR_BLANKS.containsKey(tag)) { |
|
|
|
if (!P_REMOVE_PAIR_BLANKS.containsKey(tag)) { |
|
|
|
P_REMOVE_PAIR_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?></" + tag + ">")); |
|
|
|
P_REMOVE_PAIR_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?></" + tag + ">")); |
|
|
|
} |
|
|
|
} |
|
|
|
result = regexReplace(P_REMOVE_PAIR_BLANKS.get(tag), "", result); |
|
|
|
result = regexReplace(P_REMOVE_PAIR_BLANKS.get(tag), "", result); |
|
|
|
if (!P_REMOVE_SELF_BLANKS.containsKey(tag)) { |
|
|
|
if (!P_REMOVE_SELF_BLANKS.containsKey(tag)) { |
|
|
|
P_REMOVE_SELF_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?/>")); |
|
|
|
P_REMOVE_SELF_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?/>")); |
|
|
|
} |
|
|
|
} |
|
|
|
result = regexReplace(P_REMOVE_SELF_BLANKS.get(tag), "", result); |
|
|
|
result = regexReplace(P_REMOVE_SELF_BLANKS.get(tag), "", result); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return result; |
|
|
|
return result; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static String regexReplace(final Pattern regex_pattern, final String replacement, final String s) { |
|
|
|
private static String regexReplace(final Pattern regex_pattern, final String replacement, final String s) { |
|
|
|
Matcher m = regex_pattern.matcher(s); |
|
|
|
Matcher m = regex_pattern.matcher(s); |
|
|
|
return m.replaceAll(replacement); |
|
|
|
return m.replaceAll(replacement); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String processTag(final String s) { |
|
|
|
private String processTag(final String s) { |
|
|
|
// ending tags
|
|
|
|
// ending tags
|
|
|
|
Matcher m = P_END_TAG.matcher(s); |
|
|
|
Matcher m = P_END_TAG.matcher(s); |
|
|
|
if (m.find()) { |
|
|
|
if (m.find()) { |
|
|
|
final String name = m.group(1).toLowerCase(); |
|
|
|
final String name = m.group(1).toLowerCase(); |
|
|
|
if (allowed(name)) { |
|
|
|
if (allowed(name)) { |
|
|
|
if (false == inArray(name, vSelfClosingTags)) { |
|
|
|
if (false == inArray(name, vSelfClosingTags)) { |
|
|
|
if (vTagCounts.containsKey(name)) { |
|
|
|
if (vTagCounts.containsKey(name)) { |
|
|
|
vTagCounts.put(name, vTagCounts.get(name) - 1); |
|
|
|
vTagCounts.put(name, vTagCounts.get(name) - 1); |
|
|
|
return "</" + name + ">"; |
|
|
|
return "</" + name + ">"; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// starting tags
|
|
|
|
// starting tags
|
|
|
|
m = P_START_TAG.matcher(s); |
|
|
|
m = P_START_TAG.matcher(s); |
|
|
@ -353,105 +353,105 @@ public final class HTMLFilterUtil { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (inArray(name, vSelfClosingTags)) { |
|
|
|
if (inArray(name, vSelfClosingTags)) { |
|
|
|
ending = " /"; |
|
|
|
ending = " /"; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (inArray(name, vNeedClosingTags)) { |
|
|
|
if (inArray(name, vNeedClosingTags)) { |
|
|
|
ending = ""; |
|
|
|
ending = ""; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (ending == null || ending.length() < 1) { |
|
|
|
if (ending == null || ending.length() < 1) { |
|
|
|
if (vTagCounts.containsKey(name)) { |
|
|
|
if (vTagCounts.containsKey(name)) { |
|
|
|
vTagCounts.put(name, vTagCounts.get(name) + 1); |
|
|
|
vTagCounts.put(name, vTagCounts.get(name) + 1); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
vTagCounts.put(name, 1); |
|
|
|
vTagCounts.put(name, 1); |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
ending = " /"; |
|
|
|
ending = " /"; |
|
|
|
} |
|
|
|
} |
|
|
|
return "<" + name + params + ending + ">"; |
|
|
|
return "<" + name + params + ending + ">"; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
return ""; |
|
|
|
return ""; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// comments
|
|
|
|
// comments
|
|
|
|
m = P_COMMENT.matcher(s); |
|
|
|
m = P_COMMENT.matcher(s); |
|
|
|
if (!stripComment && m.find()) { |
|
|
|
if (!stripComment && m.find()) { |
|
|
|
return "<" + m.group() + ">"; |
|
|
|
return "<" + m.group() + ">"; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return ""; |
|
|
|
return ""; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String processParamProtocol(String s) { |
|
|
|
private String processParamProtocol(String s) { |
|
|
|
s = decodeEntities(s); |
|
|
|
s = decodeEntities(s); |
|
|
|
final Matcher m = P_PROTOCOL.matcher(s); |
|
|
|
final Matcher m = P_PROTOCOL.matcher(s); |
|
|
|
if (m.find()) { |
|
|
|
if (m.find()) { |
|
|
|
final String protocol = m.group(1); |
|
|
|
final String protocol = m.group(1); |
|
|
|
if (!inArray(protocol, vAllowedProtocols)) { |
|
|
|
if (!inArray(protocol, vAllowedProtocols)) { |
|
|
|
// bad protocol, turn into local anchor link instead
|
|
|
|
// bad protocol, turn into local anchor link instead
|
|
|
|
s = "#" + s.substring(protocol.length() + 1); |
|
|
|
s = "#" + s.substring(protocol.length() + 1); |
|
|
|
if (s.startsWith("#//")) { |
|
|
|
if (s.startsWith("#//")) { |
|
|
|
s = "#" + s.substring(3); |
|
|
|
s = "#" + s.substring(3); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String decodeEntities(String s) { |
|
|
|
private String decodeEntities(String s) { |
|
|
|
StringBuffer buf = new StringBuffer(); |
|
|
|
StringBuffer buf = new StringBuffer(); |
|
|
|
|
|
|
|
|
|
|
|
Matcher m = P_ENTITY.matcher(s); |
|
|
|
Matcher m = P_ENTITY.matcher(s); |
|
|
|
while (m.find()) { |
|
|
|
while (m.find()) { |
|
|
|
final String match = m.group(1); |
|
|
|
final String match = m.group(1); |
|
|
|
final int decimal = Integer.decode(match).intValue(); |
|
|
|
final int decimal = Integer.decode(match); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
s = buf.toString(); |
|
|
|
s = buf.toString(); |
|
|
|
|
|
|
|
|
|
|
|
buf = new StringBuffer(); |
|
|
|
buf = new StringBuffer(); |
|
|
|
m = P_ENTITY_UNICODE.matcher(s); |
|
|
|
m = P_ENTITY_UNICODE.matcher(s); |
|
|
|
while (m.find()) { |
|
|
|
while (m.find()) { |
|
|
|
final String match = m.group(1); |
|
|
|
final String match = m.group(1); |
|
|
|
final int decimal = Integer.valueOf(match, 16).intValue(); |
|
|
|
final int decimal = Integer.parseInt(match, 16); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
s = buf.toString(); |
|
|
|
s = buf.toString(); |
|
|
|
|
|
|
|
|
|
|
|
buf = new StringBuffer(); |
|
|
|
buf = new StringBuffer(); |
|
|
|
m = P_ENCODE.matcher(s); |
|
|
|
m = P_ENCODE.matcher(s); |
|
|
|
while (m.find()) { |
|
|
|
while (m.find()) { |
|
|
|
final String match = m.group(1); |
|
|
|
final String match = m.group(1); |
|
|
|
final int decimal = Integer.valueOf(match, 16).intValue(); |
|
|
|
final int decimal = Integer.parseInt(match, 16); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal))); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
s = buf.toString(); |
|
|
|
s = buf.toString(); |
|
|
|
|
|
|
|
|
|
|
|
s = validateEntities(s); |
|
|
|
s = validateEntities(s); |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String validateEntities(final String s) { |
|
|
|
private String validateEntities(final String s) { |
|
|
|
StringBuffer buf = new StringBuffer(); |
|
|
|
StringBuffer buf = new StringBuffer(); |
|
|
|
|
|
|
|
|
|
|
|
// validate entities throughout the string
|
|
|
|
// validate entities throughout the string
|
|
|
|
Matcher m = P_VALID_ENTITIES.matcher(s); |
|
|
|
Matcher m = P_VALID_ENTITIES.matcher(s); |
|
|
|
while (m.find()) { |
|
|
|
while (m.find()) { |
|
|
|
final String one = m.group(1); // ([^&;]*)
|
|
|
|
final String one = m.group(1); // ([^&;]*)
|
|
|
|
final String two = m.group(2); // (?=(;|&|$))
|
|
|
|
final String two = m.group(2); // (?=(;|&|$))
|
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(checkEntity(one, two))); |
|
|
|
m.appendReplacement(buf, Matcher.quoteReplacement(checkEntity(one, two))); |
|
|
|
} |
|
|
|
} |
|
|
|
m.appendTail(buf); |
|
|
|
m.appendTail(buf); |
|
|
|
|
|
|
|
|
|
|
|
return encodeQuotes(buf.toString()); |
|
|
|
return encodeQuotes(buf.toString()); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String encodeQuotes(final String s) { |
|
|
|
private String encodeQuotes(final String s) { |
|
|
|
if (encodeQuotes) { |
|
|
|
if (encodeQuotes) { |
|
|
@ -471,29 +471,29 @@ public final class HTMLFilterUtil { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private String checkEntity(final String preamble, final String term) { |
|
|
|
private String checkEntity(final String preamble, final String term) { |
|
|
|
|
|
|
|
|
|
|
|
return ";".equals(term) && isValidEntity(preamble) ? '&' + preamble : "&" + preamble; |
|
|
|
return ";".equals(term) && isValidEntity(preamble) ? '&' + preamble : "&" + preamble; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private boolean isValidEntity(final String entity) { |
|
|
|
private boolean isValidEntity(final String entity) { |
|
|
|
return inArray(entity, vAllowedEntities); |
|
|
|
return inArray(entity, vAllowedEntities); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private static boolean inArray(final String s, final String[] array) { |
|
|
|
private static boolean inArray(final String s, final String[] array) { |
|
|
|
for (String item : array) { |
|
|
|
for (String item : array) { |
|
|
|
if (item != null && item.equals(s)) { |
|
|
|
if (item != null && item.equals(s)) { |
|
|
|
return true; |
|
|
|
return true; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return false; |
|
|
|
return false; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private boolean allowed(final String name) { |
|
|
|
private boolean allowed(final String name) { |
|
|
|
return (vAllowed.isEmpty() || vAllowed.containsKey(name)) && !inArray(name, vDisallowed); |
|
|
|
return (vAllowed.isEmpty() || vAllowed.containsKey(name)) && !inArray(name, vDisallowed); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private boolean allowedAttribute(final String name, final String paramName) { |
|
|
|
private boolean allowedAttribute(final String name, final String paramName) { |
|
|
|
return allowed(name) && (vAllowed.isEmpty() || vAllowed.get(name).contains(paramName)); |
|
|
|
return allowed(name) && (vAllowed.isEmpty() || vAllowed.get(name).contains(paramName)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|