ThinkChat2.0新版上线,更智能更精彩,支持会话、画图、视频、阅读、搜索等,送10W Token,即刻开启你的AI之旅 广告
``` ~~~ <?php ini_set('memory_limit','5102M'); //升级为256M内存 ini_set('max_execution_time', '12000'); /** * 优化版:查找指定目录中所有PHP和HTML文件中的.oss链接 * 适用于大量文件处理 */ // 配置选项 $config = [ 'target_directory' => '../xuannao/metalive/huisheng_ngd', // 要搜索的目录 //'target_directory' => '../xuannao/metalive/huisheng_ngd', // 要搜索的目录 'output_file' => 'ephpurl_ngd.txt', // 输出文件名 'ignore_dirs' => ['vendor', 'node_modules','ThinkPHP','vendor','Plugins'], // 要忽略的目录 'file_extensions' => ['php', 'html', 'htm'], // 要处理的文件扩展名 'batch_size' => 100, // 每批处理的文件数 'show_progress' => true // 是否显示进度 ]; // 主函数 function findOssLinksOptimized($config) { $fileQueue = []; $ossLinks = []; $processedFiles = 0; $startTime = microtime(true); // 使用栈实现非递归目录遍历 $dirStack = [$config['target_directory']]; while (!empty($dirStack)) { $currentDir = array_pop($dirStack); // 检查是否忽略该目录 if (shouldIgnoreDir($currentDir, $config['ignore_dirs'])) { continue; } try { $files = scandir($currentDir); foreach ($files as $file) { if ($file == '.' || $file == '..') { continue; } $path = $currentDir . DIRECTORY_SEPARATOR . $file; if (is_dir($path)) { array_push($dirStack, $path); } else { // 检查文件扩展名 $ext = strtolower(pathinfo($path, PATHINFO_EXTENSION)); if (in_array($ext, $config['file_extensions'])) { $fileQueue[] = $path; // 批量处理文件 if (count($fileQueue) >= $config['batch_size']) { processFileBatch($fileQueue, $ossLinks, $config); $processedFiles += count($fileQueue); $fileQueue = []; if ($config['show_progress']) { echo "已处理: $processedFiles 文件, 找到: " . count($ossLinks) . " 个链接\r"; } } } } } } catch (Exception $e) { error_log("Error processing directory {$currentDir}: " . $e->getMessage()); } } // 处理剩余文件 if (!empty($fileQueue)) { processFileBatch($fileQueue, $ossLinks, $config); $processedFiles += count($fileQueue); } // 保存结果 saveResults($ossLinks, $config['output_file']); $endTime = microtime(true); $elapsed = round($endTime - $startTime, 2); echo "\n完成! 共处理 {$processedFiles} 个文件, 找到 " . count($ossLinks) . " 个.oss链接\n"; echo "耗时: {$elapsed} 秒\n"; echo "结果已保存到: {$config['output_file']}\n"; } // 判断是否忽略目录 function shouldIgnoreDir($dirPath, $ignoreDirs) { foreach ($ignoreDirs as $ignoreDir) { if (strpos($dirPath, $ignoreDir) !== false) { return true; } } return false; } // 批量处理文件 function processFileBatch($filePaths, &$ossLinks, $config) { foreach ($filePaths as $filePath) { try { $content = file_get_contents($filePath); // 优化后的正则表达式,更高效地匹配.oss链接 //if (preg_match_all('/<a\s[^>]*href=["\']([^"\']*\.oss[^"\'\s>]*)["\']/i', $content, $matches, PREG_SET_ORDER)) { if (preg_match_all('/(https?:\\\\?\/\\\\?\/[^\s,"]+|https?:\/\/[^\s,"]+)/i', $content, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { $url =html_entity_decode($match[1]); if (strpos($url, '.oss') == false) { continue; } if (strpos($url, '.apk') !== false) { continue; } // 标准化URL(替换所有转义斜杠) $normalizedUrl = str_replace(['\/', '\\/'], '/', $url); // 移除可能存在的结尾单引号或逗号 $cleanUrl = rtrim($normalizedUrl, "',"); $cleanUrl = str_replace('"', "",$cleanUrl); $cleanUrl = str_replace('</span>', "",$cleanUrl); $cleanUrl = str_replace('<span>', "",$cleanUrl); $cleanUrl = str_replace('</p>', "",$cleanUrl); $cleanUrl = str_replace('<p', "",$cleanUrl); $cleanUrl = str_replace('<p>', "",$cleanUrl); $cleanUrl = str_replace('<br', "",$cleanUrl); $cleanUrl = str_replace('<br>', "",$cleanUrl); $cleanUrl = str_replace('&amp;', "&",$cleanUrl); $cleanUrl = str_replace('"}', "",$cleanUrl); $cleanUrl = str_replace('}', "",$cleanUrl); $cleanUrl = str_replace('>', "",$cleanUrl); $cleanUrl = str_replace('//付费合集标签', "",$cleanUrl); $cleanUrl = str_replace('?x-oss-process=image/resize', "",$cleanUrl); $cleanUrl = str_replace('?x-oss-process=image%2Fformat%2Cjpg', "",$cleanUrl); $cleanUrl = str_replace("';", "",$cleanUrl); $cleanUrl = str_replace("\\", "",$cleanUrl); $cleanUrl = str_replace("?x-oss-process=video/snapshot", "",$cleanUrl); $cleanUrl = str_replace("'];", "",$cleanUrl); $cleanUrl = str_replace("']", "",$cleanUrl); //$ossLinks[] = $cleanUrl; $ossLinks[] = [ 'file' => $filePath, 'link' => html_entity_decode($match[1]) // 处理HTML实体 ]; } } } catch (Exception $e) { error_log("Error processing file {$filePath}: " . $e->getMessage()); } } } // 保存结果到文件 function saveResults($ossLinks, $outputFile) { $output = ''; foreach ($ossLinks as $link) { $output .= "File: " . $link['file'] . "\n"; $output .= "Link: " . $link['link'] . "\n"; $output .= "-------------------------\n"; // $output .= "" . $link . "\n"; } file_put_contents($outputFile, $output); } // 执行 findOssLinksOptimized($config); ~~~ ```