|
|
发表于 2013-2-22 11:18:16
|
显示全部楼层
本帖最后由 demo 于 2013-2-23 03:21 编辑
Some codes you might need while get data from webpage:- <?php
- $url = "http://www.google.com.au/";
- $page = fopen($url, 'r');
- $content = "";
- while( !feof( $page ) ) {
- $buffer = trim( fgets( $page, 4096 ) );
- $content .= $buffer;
- }
- echo "$content";
- echo "\r\n==========Remove script============\r\n";
- $tmp = preg_replace('%<script.*?>.*?</script>%is','|',$content);
- $tmp1 = preg_replace('%<style>.*?</style>%is',':',$tmp);
- //eregi('',$content,$tmp);
- $result = ereg_replace('[[:blank:]]',' ',$tmp);
- echo "$tmp1";
- echo "\r\n==========Use strip_tags to remove html tags with some exceptions============\r\n";
- $tmp = strip_tags($content,'<a><head>');
- echo "$tmp";
- echo "\r\n==========Text only============\r\n";
- // This echoes correctly all the text that is not inside HTML tags
- $html_reg = '/<+\s*\/*\s*([A-Z][A-Z0-9]*)\b[^>]*\/*\s*>+/i';
- $tmp = preg_replace( $html_reg, '', $content );
- //echo htmlentities( preg_replace( $html_reg, '', $html ) );
- echo "$tmp";
- echo "\r\n==========Get text============\r\n";
- /* $tmp = preg_replace('/<script[^>]*?>[^<]*?<\/script>/i','***',$content);
- eregi('',$content,$tmp);
- * used for bb replace .....
- $patterns = array(
- "/\[link\](.*?)\[\/link\]/",
- "/\[url\](.*?)\[\/url\]/",
- "/\[img\](.*?)\[\/img\]/",
- "/\[b\](.*?)\[\/b\]/",
- "/\[u\](.*?)\[\/u\]/",
- "/\[i\](.*?)\[\/i\]/"
- );
- $replacements = array(
- "<a href="\\1">\\1</a>",
- "<a href="\\1">\\1</a>",
- "<img src="\\1">",
- "<b>\\1</b>",
- "<u>\\1</u>",
- "<i>\\1</i>"
-
- );
- */
- $patterns = array(
- "/<a href="(.*?)"(.*?)>(.*?)<\/a>/",
- "/<img src="(.*?)"(.*?)\/>/"
- );
- $replacements = array(
- "\r\n\\3 -->http://www.demo.com\\1\r\n",
- "\r\n Img --> http://www.demo.com\\1\r\n"
-
- );
- $result = preg_replace($patterns,$replacements, $tmp1);
- //$result = ereg_replace('[[:blank:]]',' ',$tmp);
- echo "$result";
- ?>
复制代码 More flexible code to get link or picture as:- <?php
- $url = "http://news.msn.com/";
- $page = fopen($url, 'r');
- $content = "";
- while( !feof( $page ) ) {
- $buffer = trim( fgets( $page, 4096 ) );
- $content .= $buffer;
- }
- $start = "<section id="featured_classic fill > cluster" class="featured " data-aop="featured_classic fill > cluster">";
- $end = "<section id="clusters">";
- preg_match( "/$start(.*)$end/s", $content, $match);
- $mytext = $match[1];
- echo "$mytext\r\nData List \r\n";
- $patterns = array(
- "/<a href="(.*?)"(.*?)>(.*?)<\/a>/",
- "/<img(.*?)src="(.*?)"(.*?)\/>/"
- );
- $replacements = array(
- "\r\n...\\3 -->http://www.demo.com\\1\r\n",
- "\r\n...Img --> http://www.demo.com\\2\r\n"
-
- );
- $result = preg_replace($patterns,$replacements, $mytext);
- //$result = ereg_replace('[[:blank:]]',' ',$tmp);
- echo "$result";
- ?>
复制代码 And this will give you information of the header:- <?php
- $fp = fopen('http://www.google.com', 'r');
- // Creates variable $http_response_header
- print_r($http_response_header);
- // or
- $meta_data = stream_get_meta_data($fp);
- print_r($meta_data);
- ?>
复制代码 Pretty cool.
|
|