`
vtrtbb
  • 浏览: 354192 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

java检测html是否闭合

    博客分类:
  • java
阅读更多
class TagsList 
...{
    private String[] data;
    private int size = 0;

    public TagsList(int size) 
    ...{
        data = new String[size];
    }

    public TagsList() 
    ...{
        this(10);
    }

    public void add(String str) 
    ...{
        ensureCapacity(size + 1);
        data[size++] = str;
    }

    public String get(int index) 
    ...{
        if (index < size)
            return data[index];
        else
            return null;
    }

    //为了提高效率,只将其置为null
    public boolean remove(String str) 
    ...{
        for (int index = 0; index < size; index++) ...{
            if (str.equals(data[index])) ...{
                data[index] = null;
                return true;
            }
        }
        return false;
    }
    
    public boolean remove(int index)
    ...{
        if (index < data.length) ...{
            data[index] = null;
            return true;
        }
        return false;
    }

    public int size() 
    ...{
        return this.size;
    }

    //扩展容量
    public void ensureCapacity(int minSize) 
    ...{
        int oldCapacity = data.length;
        if (minSize > oldCapacity) ...{
            int newCapacity = (oldCapacity * 3 / 2 + 1) > minSize ? 
                    oldCapacity * 3 / 2 + 1 : minSize;
            data = (String[]) Arrays.copyOf(data, newCapacity);
        }
    }
}

 

 

**
 * 检验文本中的HTML标签是否闭合,并提供简单的修复功能
 * @author Liw
 * @time 2007-6
 */

public class TagsChecker 
...{
    public static boolean check(String str) 
    ...{
        TagsList[] unclosedTags = getUnclosedTags(str);

        if (unclosedTags[0].size() != 0) ...{
            return false;
        }
        for (int i = 0; i < unclosedTags[1].size(); i++) ...{
            if (unclosedTags[1].get(i) != null)
                return false;
        }

        return true;
    }

    public static String fix(String str) 
    ...{
        StringBuffer fixed = new StringBuffer(); // 存放修复后的字符串
        TagsList[] unclosedTags = getUnclosedTags(str);

        // 生成新字符串
        for (int i = unclosedTags[0].size() - 1; i > -1; i--) ...{
            fixed.append("<" + unclosedTags[0].get(i) + ">");
        }

        fixed.append(str);

        for (int i = unclosedTags[1].size() - 1; i > -1; i--) ...{
            String s = null;
            if ((s = unclosedTags[1].get(i)) != null) ...{
                fixed.append("</" + s + ">");
            }
        }

        return fixed.toString();
    }

    private static TagsList[] getUnclosedTags(String str) 
    ...{
        StringBuffer temp = new StringBuffer(); // 存放标签
        TagsList[] unclosedTags = new TagsList[2];
        unclosedTags[0] = new TagsList(); // 前不闭合,如有</div>而前面没有<div>
        unclosedTags[1] = new TagsList(); // 后不闭合,如有<div>而后面没有</div>
        boolean flag = false; // 记录双引号"或单引号'
        char currentJump = ' '; //记录需要跳过'...'还是"..."

        char current = ' ', last = ' '; // 当前 & 上一个

        // 开始判断
        for (int i = 0; i < str.length();) ...{
            current = str.charAt(i++); // 读取一个字符
            if (current == '"' || current == '\'') ...{
                flag = flag ? false : true; // 若为引号,flag翻转
                currentJump = current;
            }
            if (!flag) ...{
                if (current == '<') ...{ // 开始提取标签
                    current = str.charAt(i++);
                    if (current == '/') ...{ // 标签的闭合部分,如</div>
                        current = str.charAt(i++);

                        // 读取标签
                        while (i < str.length() && current != '>') ...{
                            temp.append(current);
                            current = str.charAt(i++);
                        }

                        // 从tags_bottom移除一个闭合的标签
                        if (!unclosedTags[1].remove(temp.toString())) ...{ // 若移除失败,说明前面没有需要闭合的标签
                            unclosedTags[0].add(temp.toString()); // 此标签需要前闭合
                        }
                        temp.delete(0, temp.length()); // 清空temp
                    } 
                    else ...{ // 标签的前部分,如<div>
                        last = current;
                        while (i < str.length() && current != ' '
                                && current != ' ' && current != '>') ...{
                            temp.append(current);
                            last = current;
                            current = str.charAt(i++);
                        }

                        // 已经读取到标签,跳过其他内容,如<div id=test>跳过id=test
                        while (i < str.length() && current != '>') ...{
                            last = current;
                            current = str.charAt(i++);
                            if (current == '"' || current == '\'') ...{ // 判断引号
                                flag = flag ? false : true;
                                currentJump = current;
                                if (flag) ...{ // 若引号不闭合,跳过到下一个引号之间的内容
                                    while (i < str.length() && str.charAt(i++) != currentJump);
                                    current = str.charAt(i++);
                                    flag = false;
                                }
                            }
                        }
                        if (last != '/' && current == '>') // 判断这种类型:<TagName />
                            unclosedTags[1].add(temp.toString());
                        temp.delete(0, temp.length());
                    }
                }
            } 
            else ...{
                while (i < str.length() && str.charAt(i++) != currentJump); // 跳过引号之间的部分
                flag = false;
            }
        }
        return unclosedTags;
    }
}

 

 

 

public class Test 
...{
    public static void main(String[] args)
    ...{
        System.out.println("--功能测试--");
        String str1 = "tt</u>ss</a>aa<div name="<test>" id='3' other='<test>'><b>sff";
        String str2 = "tt<u>ss</u><div id=test name="<test>"><a>fds</a></div>";
        System.out.println("检查文本 " + str1);
        System.out.println("结果:" + TagsChecker.check(str1));
        System.out.println("检查文本 " + str2);
        System.out.println("结果:" + TagsChecker.check(str2));
        System.out.println("修复文本 " + str1);
        System.out.println("结果:" + TagsChecker.fix(str1));
        
        for (int i = 0; i < 10; i++) ...{
            str1 += str1;
        }
        
        System.out.println();
        System.out.println("--效率测试--");
        System.out.println("文本长度:" + str1.length());
        long t1 = System.currentTimeMillis();
        boolean closed = TagsChecker.check(str1);
        long t2 = System.currentTimeMillis();
        String fixedStr = TagsChecker.fix(str1);
        long t3 = System.currentTimeMillis(); 
        System.out.println("检查用时:" + (t2 - t1) + " 毫秒 结果:" + closed);
        System.out.println("修复用时:" + (t3 - t2) + " 毫秒");
    }

}

 

 

 

下面是在我机器上的测试结果:

配置:CPU P4-M1.8GHz   内存 768M

--功能测试--
检查文本 tt</u>ss</a>aa<div name="<test>" id='3' other='<test>'><b>sff
结果:false
检查文本 tt<u>ss</u><div id=test name="<test>"><a>fds</a></div>
结果:true
修复文本 tt</u>ss</a>aa<div name="<test>" id='3' other='<test>'><b>sff
结果:<a><u>tt</u>ss</a>aa<div name="<test>" id='3' other='<test>'><b>sff</b></div>

--效率测试 --
文本长度:62464
检查用时:101 毫秒 结果:false
修复用时:110 毫秒

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics