2009年3月16日星期一

一个简单的HTML解析器

这个解析器还不能很好的工作,但是架子大概已经有了,慢慢完善吧。

import URLTest.URLUtil;

public class HtmlParser {
public String leader;
public String tag;
public String body;
public String end;
public String trailer;

public HtmlParser more;
public HtmlParser parts;

static String tags[]={"html","body","table","tr","td"};

public HtmlParser(String text){
this (text,tags,0,0);
}

public HtmlParser(String text, String[] tags2){
this (text,tags2,0,0);
}

public HtmlParser(String text, String[] tags2, int level, int offset) {
String lc=text.toLowerCase();
int startTag=lc.indexOf("<"+tags[level]);
int endTag=lc.indexOf(">",startTag)+1;
int startEnd=lc.indexOf(" int endEnd=lc.indexOf(">",startEnd)+1;
int startMore=lc.indexOf("<"+tags[level],endEnd);

if(startTag<0||endTag<0||startEnd<0||endEnd<0){
return;
}


leader=text.substring(0,startTag);
tag=text.substring(startTag,endTag);
body=text.substring(endTag,startEnd);
end=text.substring(startEnd,endEnd);
trailer=text.substring(endEnd);
System.out.println("leader is :"+leader);
System.out.println("tag is :"+tag);
System.out.println("body is :"+body);
System.out.println("end is :"+end);
System.out.println("trailer is :"+trailer);

if(level+1 parts=new HtmlParser(body,tags,level+1,offset+endTag);
body=null;
}

if(startMore>=0){
more=new HtmlParser(body,tags,level,offset+endEnd);
trailer=null;
}
}

public static void main(String[] args) {
HtmlParser hp=new HtmlParser(URLUtil.getHtml("http://www.google.com"));
}

}

2 条评论:

goooooood girl 说...

your blog is feel good......

匿名 说...

who are you?