整体的一个思路是用curl函数抓取整个页面的信息,然后用正则对数据进行清洗
<?php header("Content-Type: text/html;charset=utf-8"); /** * 自动抓取html代码并返回 * @author chunlai * @date 2018-08-16 * @anotherdate 2018-08-16T17:17:37+080 * @Email kk@yaochunlai.xin * 你放心我不是什么好人 * @param string $url url地址 * @param boolean $proxy_flag 是否开启代理 * @param array $proxy 代理的一些参数 */ function setCurl($url, $proxy_flag=false, $proxy=array()) { $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 0); curl_setopt($ch, CURLOPT_HEADER, false); curl_setopt($ch, CURLOPT_TIMEOUT, 120); if ('https' == substr($url, 0, 5)) { curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); } if ($proxy_flag) { curl_setopt($ch, CURLOPT_PROXY, $proxy['name'].':'.$proxy['pass'].'@'.$proxy['host']); curl_setopt($ch, CURLOPT_PROXYPORT, $proxy['port']); } $result = curl_exec($ch); curl_close($ch); return $result; } function handleBaikeInfo($baike_info) { $basic_info = array(); preg_match('/出品公司(.*)\<\/dd\>/U', $baike_info, $match); $basic_info['chupin'] = $match[1]; preg_match('/导演(.*)\<\/dd\>/U', $baike_info, $match); $basic_info['director'] = $match[1]; preg_match('/主演\<\/dt\>(.*)\<\/dd\>/U', $baike_info, $match); $basic_info['actor'] = $match[1]; foreach ($basic_info as $key => $value) { $basic_info[$key] = strip_tags($value); } var_dump($basic_info); } function trimAll($str) { $pattern = array('',' ',' ',"\t","\n","\r"," "); $str = str_replace($pattern, '', $str); $str = preg_replace('/\x{00a0}/u', '', $str); return $str; } $url = 'https://baike.baidu.com/item/%E6%88%98%E7%8B%BC%E2%85%A1/20794668?fr=aladdin&fromid=17196087&fromtitle=%E6%88%98%E7%8B%BC2'; $html = setCurl($url); $html = trimAll($html); $html = handleBaikeInfo($html); echo $html;
[title]CURL访问接口API[/title]
什么是API?
类似于一个远程可访问的函数
有接收值、有访问值
应用
第三方API应用
跨语言通信
function api($url,$params=array(),$method='GET',$header=array()){ $opts = array( CURLOPT_TIMEOUT => 30, CURLOPT_RETURNTRANSFER => TRUE, CURLOPT_SSL_VERIFYHOST => FALSE, CURLOPT_SSL_VERIFYPEER => FALSE, CURLOPT_HTTPHEADER => $header, CURLOPT_HEADER => FALSE ); switch(strtoupper($method)){ case 'GET': $opts[CURLOPT_URL] = $url.'?'.http_build_query($params); break; case 'POST': $opts[CURLOPT_URL] = $url; $opts[CURLOPT_POST] = TRUE; $opts[CURLOPT_POSTFIELDS] = $params; break; } $ch = curl_init(); curl_setopt_array($ch,$opts); $result = curl_exec($ch); $error = curl_error($ch); curl_close($ch); if($error){ echo 'curl执行出错'; } return $result; } $url = 'http://localhost/get_member_name.php'; $params = array('id'=>2); $ret = api($url,$params); echo $ret;