第八课正则表达式 · 万能Web开发文档笔记

>常用正则 ~~~ ([\s\S]*?) 表示任意多个字符,换行也可以匹配 ([\s*]+) 匹配一个或多个空格 ([\s,]+) 匹配多个空格或逗号 ([,]+) 匹配多个逗号 /php/i 不区分大小写 ^ $ 匹配开始结束字符 . 匹配除换行以外字符串 ? 0次或 1次等价{0,1} * 0次或多次等价{0,} + 1次或多次等价{1,} - 表示范围 [] 开始结束字符类定义 \d 任意10进制数字 [0-9] \s 任意空白字符单个 \S 任意非空白字符 \w 任意单词字符等价[a-zA-Z0-9] (?:中国|美国)(.*) 匹配中国,美国开头的字符串 (\d+\.\d+\.\d+\.\d+) IP ([a-zA-Z][a-zA-Z0-9_]) 匹配是否合法字母开头 (\d-\d|\d-\d) 电话号码 [1-9][0-9] qq ^[\w\.\-]+@\w+([\.\-]\w+)*\.\w+$ email href="(.*?)" 超链接 /^\d{1,6}$/ 匹配0-999999 /\d{4}年\d{1,2}月\d{1,2}/ 匹配年月日 ~~~ >preg_math 匹配一次,成功返回 true ~~~ preg_match("/\<center>([\s\S]*?)<\/center\>/",$str,$rs); ~~~ >preg_match_all匹配多次,成功返回true ~~~ preg_match_all("/\<center>([\s\S]*?)<\/center\>/",$str,$rs); ~~~ >preg_replace 匹配替换,替换成$re ~~~ $rs =preg_replace("/\<center>([\s\S]*?)<\/center\>/",$re,$str); ~~~ >preg_split分割成数组 ~~~ $arr = preg_split('/([\s*]+)/',"a b c d ef"); ~~~ 替换 ~~~ $str = "选项[http://127.0.0.1/weixin/addons/yoby_diyform/weui/fm.jpg]你好"; $str1 = preg_replace("/(?:\[)(.*?)(?:\])/i", "<img src=\"\${1}\" />", $str); preg_replace("/.*\|(.*?)\|.*/i", "\${1}", $v); 字符|120000|来了输出120000 ~~~ \s+ 多个空白 [^>] >左边任意字符 .*? 任意多个字符 \d+ 匹配数字 ~~~ /*获取html并用正则处理*/ function get_content($url){ $html = file_get_contents($url); $code= mb_detect_encoding($html, array("GB2312","GBK",'UTF-8','BIG5'));//获取编码 if($code!="UTF-8"){ $htmls = mb_convert_encoding($html, "UTF-8", $code);//转换内容为UTF-8编码 }else{ $htmls = $html; } $htmls = preg_replace("/<script[\s\S]*?<\/script>/i","",$htmls,-1);//去除script $htmls = preg_replace("/<noscript[\s\S]*?<\/noscript>/i","",$htmls,-1);//去除noscript $htmls=preg_replace("/<(\/?link.*?)>/si","",$htmls);//去掉link $htmls=preg_replace("/<(style.*?)>(.*?)<(\/style.*?)>/si","",$htmls);//去掉style $htmls =preg_replace("/style=.+?['|\"]/i",'',$htmls,-1);//去除style行内样式 $htmls =preg_replace('##' , '' , $htmls);//去掉html注释 $htmls = preg_replace("/<a[^>]*>(.*?)<\/a>/is", "$1", $htmls);//去除外站超链接 $htmls = preg_replace("/(\n\r)/i", '', $htmls); //去掉空行 return $htmls; } preg_match('/<div class="infoBox-list".*?>.*?<div class="news-page clearfix">/ism', $htmls, $rs); $htmls = $rs[0];//获取两个class之间内容 $url = (preg_match('/^http(s)?:\\/\\/.+/',$url))?$url:"http:// ".$url;//判断是否包含https/http preg_match("/src=\"\/?(.*?)\"/",$content,$match); 第一张图片 ~~~ ~~~ [\u4e00-\u9fa5]{0,} 匹配中文 \d+ 匹配>=0数字 [a-zA-Z]+ 不区分大小写26个字母 [A-Za-z0-9]+ 英文与数字 \s+ 多个空格 [0-9]* 匹配一串数字 \d{4} 匹配四位数字 \d{5,} 匹配至少5位数 \d{4,10} 匹配4-10位数 ~~~