💎一站式轻松地调用各大LLM模型接口,支持GPT4、智谱、豆包、星火、月之暗面及文生图、文生视频 广告
**数据比较正规的** ~~~ <?php $content = "名称:山东XXX管理有限公司 税号:91370112MAXXX53G46 单位地址:山东省济南市历城区XXXXB座201室 电话:13850000399 开户银行:济南XXXXX山支行 银行账户:2050000000000010537"; // 使用正则表达式匹配各项信息 preg_match('/名称:(.*)/', $content, $nameMatches); preg_match('/税号:(.*)/', $content, $taxNumberMatches); preg_match('/单位地址:(.*)/', $content, $addressMatches); preg_match('/电话:(.*)/', $content, $phoneMatches); preg_match('/开户银行:(.*)/', $content, $bankMatches); preg_match('/银行账户:(.*)/', $content, $accountMatches); // 创建发票信息数组 $invoiceInfo = [ 'invoice_title' => isset($nameMatches[1]) ? trim($nameMatches[1]) : '', 'tax_number' => isset($taxNumberMatches[1]) ? trim($taxNumberMatches[1]) : '', 'address' => isset($addressMatches[1]) ? trim($addressMatches[1]) : '', 'phone' => isset($phoneMatches[1]) ? trim($phoneMatches[1]) : '', 'bank_name' => isset($bankMatches[1]) ? trim($bankMatches[1]) : '', 'bank_account' => isset($accountMatches[1]) ? trim($accountMatches[1]) : '' ]; // 输出结果 echo "<pre>"; print_r($invoiceInfo); echo "</pre>"; // 或者以JSON格式输出 // echo json_encode($invoiceInfo, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); ?> ~~~ **数据比较杂乱的** ~~~ <?php $content = "山东XXxX管理有限公司91370112MA3P000046山东省济XXXXXXXXXXXX座201室 13850000099济南XXXXX支行050000000205000010537 05320000957"; // 主正则表达式匹配方案 $pattern = '/ ^ (?<invoice_title>[^\d]+?) # 发票抬头(非数字开头) (?<tax_number>[A-Z0-9]{18}) # 税号(18位) (?<address>.+?) # 地址 \s # 地址和电话间的空格 (?<phone>1[3-9]\d{9}) # 手机号(11位,1开头) (?<bank_name>[^\d]+?) # 开户行(非数字开头) (?<bank_account>\d{16,20}) # 银行账号(16-20位数字) \s # 银行账号和电话间的空格 (?<phone2>\d{7,12}) # 可能的固定电话 $ /x'; preg_match($pattern, $content, $matches); if ($matches) { $invoiceInfo = [ 'invoice_title' => trim($matches['invoice_title']), 'tax_number' => $matches['tax_number'], 'address' => trim($matches['address']), 'phone' => $matches['phone'], 'bank_name' => trim($matches['bank_name']), 'bank_account' => $matches['bank_account'], 'fixed_phone' => $matches['phone2'] ?? '' // 新增的固定电话 ]; } else { // 备用方案:分步骤提取 // 1. 提取税号(18位字母数字组合) preg_match('/[A-Z0-9]{18}/', $content, $taxMatch); $taxPos = strpos($content, $taxMatch[0]); // 2. 提取手机号(11位) preg_match('/1[3-9]\d{9}/', $content, $phoneMatch); $phonePos = strpos($content, $phoneMatch[0]); // 3. 提取银行账号(16-20位数字) preg_match('/\d{16,20}/', $content, $accountMatch); $accountPos = strpos($content, $accountMatch[0]); // 4. 提取固定电话(7-12位数字) preg_match('/\s(\d{7,12})\s*$/', $content, $fixedPhoneMatch); $invoiceInfo = [ 'invoice_title' => trim(substr($content, 0, $taxPos)), 'tax_number' => $taxMatch[0] ?? '', 'address' => trim(substr($content, $taxPos + 18, $phonePos - ($taxPos + 18))), 'phone' => $phoneMatch[0] ?? '', 'bank_name' => trim(substr($content, $phonePos + 11, $accountPos - ($phonePos + 11))), 'bank_account' => $accountMatch[0] ?? '', 'fixed_phone' => $fixedPhoneMatch[1] ?? '' ]; } // 清理数据 $invoiceInfo = array_map('trim', $invoiceInfo); // 输出结果 header('Content-Type: application/json; charset=utf-8'); echo json_encode($invoiceInfo, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); /* 预期输出: { "invoice_title": "山东XXXX管理有限公司", "tax_number": "91370112XXXXXX53G46", "address": "山东省济南市历城XXXXXXXX201室", "phone": "13853000099", "bank_name": "济南农商XXXX山支行", "bank_account": "05000390000000010537", "fixed_phone": "05325000057" } */ ?> ~~~