サイト収集スクリプトfunction.php
ザウルスNetFrontでも動作するように修正。function.phpを直接開いた際にフォームを表示するようにした。PATH項目が登録されないバグも回避できないか、試しにSLEEPを入れてみた。効果のほどは確認していない。
<? define("SAVE_DIR", "./data"); define("DB_HOST", DB_HOST); define("DB_USER", DB_USER); define("DB_PASS", DB_PASS); define("DB_NAME", DB_NAME); define("WGET_CMD", "/usr/local/bin/wget"); define("RM_CMD", "/bin/rm"); define("ZIP_CMD", "/usr/local/bin/zip"); define("LS_CMD", "/bin/ls"); if( $_GET[mode] == "select" ){ select( $_GET[update_check] ); return; } if( isset($_GET[url]) ){ $src = urldecode($_GET[url]); switch( $_GET[mode] ){ case "wget": wget( $src ); $title = mb_convert_encoding($_GET[title], 'UTF-8', 'auto'); $comment = mb_convert_encoding($_GET[comment], 'UTF-8', 'auto'); sleep(1); dbput( $src, $title, $comment ); if( !isset($_GET[ajax]) || $_GET[ajax] == "true" ){ }else{ nonAjax(); } break; case "rmdir": dbdel( $src ); if( !isset($_GET[ajax]) || $_GET[ajax] == "true" ){ }else{ nonAjax(); } break; case "title": echo getTitle( $src ); break; case "zip": if( !isset($_GET[ajax]) || $_GET[ajax] == "true" ){ echo zip( $src ); }else{ header("Location: ".zip($src)); nonAjax(); } break; } }else{ nonAjax(); } function nonAjax(){ $con = mysql_connect(DB_HOST,DB_USER,DB_PASS); if( !$con || !mysql_select_db(DB_NAME) ){ mysql_close($con); echo(mysql_error()); return; } $sql = ""; $sql .= " select `URL`,`TITLE` from site_list order by `URL`"; if( !($rs = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return; } $option = ""; while( $item = mysql_fetch_array($rs) ){ $url = $item['URL']; $title = $item['TITLE']; $option .= "<option value='".$url."'>".$title."</option>"; } mysql_close($con); print <<<EOF <html> <head> <title>wget</title> </head> <body> <form method="GET" action="function.php"> <input type="hidden" name="ajax" value="false"> <input type="hidden" name="mode" value="wget"> <table border="0"> <tr><th>URL</th><td><input id="url" type="text" size="60" name="url" value="http://"> </td></tr> <tr><th>タイトル</th><td><input id="title" type="text" size="60" name="title" value=""> </td></tr> <tr><th>コメント</th><td><input id="comment" type="text" size="60" name="comment" value=""> <input type="submit" value="wget実行"> </td></tr> </table> </form> <form method="GET" action="function.php"> <input type="hidden" name="ajax" value="false"> <input type="hidden" name="mode" value="zip"> <select id="url" name="url">{$option}</select> <input type="submit" value="圧縮"> </form> <form method="GET" action="function.php"> <input type="hidden" name="ajax" value="false"> <input type="hidden" name="mode" value="rmdir"> <select id="url" name="url">{$option}</select> <input type="submit" value="削除"> </form> <a href=".">back</a> </body> </html> EOF; } function wget( $url ){ if( ereg("^http://",$url) ){ $domain = parse_url($url); $cmd = WGET_CMD." -b -r -k -np -q -t 3 -S -N -a wget.log -P ".SAVE_DIR." -D".$domain[host]." ".$url; exec($cmd, $ret, $retCode); return $ret; } return false; } function dbput( $url, $title = "", $comment = "" ){ if( ereg("^http://",$url) ){ $domain = parse_url(ereg_replace("~","%7E",$url)); $con = mysql_connect(DB_HOST,DB_USER,DB_PASS); if( !$con || !mysql_select_db(DB_NAME) ){ mysql_close($con); echo(mysql_error()); return false; } $dir = realpath( SAVE_DIR."/".$domain[host]."/".$domain[path] ); if( !is_dir($dir) ){ $dir = dirname($dir); } $sql = ""; $sql .= " select *"; $sql .= " from `site_list`"; $sql .= " where `URL` = '".$url."'"; $sql .= " or `PATH` = '".$dir."'"; //echo $sql; if( !($rs = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return false; } if( mysql_num_rows($rs) == 0 ){ $sql = ""; $sql .= " insert into `site_list`"; $sql .= " values("; $sql .= " '".$url."',"; $sql .= " '".$dir."',"; $sql .= " '".$title."',"; $sql .= " '".$comment."',"; $sql .= " NOW()"; $sql .= " )"; //echo $sql; if( !($rcd = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return false; } }else{ $sql = ""; $sql .= " update `site_list`"; $sql .= " set "; $sql .= " `URL` = '".$url."',"; $sql .= " `PATH` = '".$dir."',"; $sql .= " `TITLE` = '".$title."',"; $sql .= " `COMMENT` = '".$comment."',"; $sql .= " `UPDATE` = NOW()"; $sql .= " where `URL` = '".$url."'"; $sql .= " or `PATH` = '".$dir."'"; //echo $sql; if( !($rcd = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return false; } } mysql_close($con); return true; } return false; } function dbdel( $url ){ if( ereg("^http://",$url) ){ $domain = parse_url($url); $con = mysql_connect(DB_HOST,DB_USER,DB_PASS); if( !$con || !mysql_select_db(DB_NAME) ){ mysql_close($con); echo(mysql_error()); return false; } $dir = SAVE_DIR."/".$domain[host]."/".$domain[path]; $dir = realpath(ereg_replace("~","%7E",$dir)); if( !is_dir($dir) ){ $dir = dirname($dir); } exec(RM_CMD." -rf ".$dir, $ret, $retCode); $sql = ""; $sql .= " delete"; $sql .= " from `site_list`"; $sql .= " where `URL` = '".$url."'"; $sql .= " or `PATH` = '".$dir."'"; //echo $sql; if( !($rs = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return false; } mysql_close($con); return true; } return false; } function getTitle( $url ){ if( ereg("^http://",$url) ){ $text = file_get_contents( $url ); if( eregi("<title>.+</title>", $text, $regs) ){ $title = eregi_replace("<.?title>", "", $regs[0]); $title = mb_convert_encoding($title, 'UTF-8', 'auto'); return $title; } } return ""; } function zip( $url ){ $domain = parse_url($url); $dir = SAVE_DIR."/".$domain[host]."/".$domain[path]; $dir = realpath(ereg_replace("~","%7E",$dir)); if( !is_dir($dir) ){ $dir = dirname($dir); } $file = basename($dir); $dir = toRelative("./data",$dir); exec(RM_CMD." *.zip", $ret, $retCode); exec("cd ".SAVE_DIR.";".ZIP_CMD." -9 -r ../".$file.".zip ".$dir.";cd -", $ret, $retCode); if( $ret ){ return $file.".zip"; } return ""; } function toRelative( $base, $target ){ if( is_file($base) ){ $base = dirname($base); } $aryB = split( "/", realpath($base) ); $aryT = split( "/", realpath($target) ); while( $aryB[0] == $aryT[0] ){ array_shift( $aryB ); array_shift( $aryT ); } $path = ""; while (list($key, $val) = each($aryB)) { $path .= "../"; } if( strlen($path) == 0 ){ $path = "./"; } $path .= join("/",$aryT); return $path; } function lsdir( $path ){ $str = ""; exec(LS_CMD." -F ".$path, $ret, $retCode); while( list($key, $value) = each($ret) ){ if( ereg("/$",$value) ){ if( strlen($str) != 0 ){ $str .= ","; } $str .= $value; } } return explode( ",", $str ); } function select( $update_check = "0" ){ $con = mysql_connect(DB_HOST,DB_USER,DB_PASS); if( !$con || !mysql_select_db(DB_NAME) ){ mysql_close($con); echo(mysql_error()); return; } $sql = ""; $sql .= " select *,"; $sql .= " UNIX_TIMESTAMP(`UPDATE`) AS UNIX_TIME"; $sql .= " from site_list"; $sql .= " order by URL"; if( !($rs = mysql_query($sql)) ){ mysql_close($con); echo(mysql_error()); return; } header( "Content-type: text/xml; charset=UTF-8" ); print( "<?xml version='1.0' encoding='UTF-8' ?>\n" ); print( "<sites>\n" ); while( $item = mysql_fetch_array($rs) ){ $url = $item['URL']; $path = $item['PATH']; $title = $item['TITLE']; $comment = $item['COMMENT']; $update = $item['UPDATE']; $unix_time = $item['UNIX_TIME']; $domain = parse_url($url); print( "<site" ); print( " url='".urldecode($url)."'" ); print( " title='".$title."'" ); print( " comment='".$comment."'" ); print( " path='".ereg_replace("%7E","%257E",toRelative(".",$path))."'" ); print( " update='".$update."'"); if( $update_check == "1" ){ if( $unix_time < getLastModified($url) ){ print( " updated='true'"); }else{ print( " updated='false'"); } } print( "/>\n" ); } print( "</sites>\n" ); mysql_close($con); } function getLastModified( $url ){ if( ereg("^http://",$url) ){ $domain = parse_url($url); $file = ereg_replace( "/$", "", $domain[host].$domain[path] ); $fp = fsockopen( $file, 80, $errno, $errstr, 30 ); if( !$fp ){ return -1; }else{ $out = "HEAD / HTTP/1.1\r\n"; $out .= "Host: ".$domain[host]."\r\n"; $out .= "Connection: Close\r\n\r\n"; fwrite( $fp, $out ); while( !feof($fp) ){ $str = fgets( $fp, 128 ); if( strlen($str) == 0 ){ fclose($fp); return -1; } if( eregi( "Last-Modified:.*", $str, $regs ) ){ $date = strtotime(ereg_replace("Last-Modified: ","",$str)); fclose($fp); return $date; } } fclose($fp); } } return -1; } ?>