<?php
namespace app\common;
use ZipArchive;
use SimpleXMLElement;
use PhpOffice\PhpSpreadsheet\IOFactory;
class ExtractWPS
{
/**
* 提取 WPS Excel 文件中的内嵌图片,并建立单元格到图片路径的映射
*
* @param string $fileUrl Excel 文件的 URL 或本地路径
* @param bool $isReturnCell Excel 返回的映射是否是单元格的坐标,默认不返回
* @return array [单元格 => 图片本地路径] [id_名称 => 图片本地路径]
* @throws \Exception
*/
public static function extract(string $fileUrl, bool $isReturnCell = false): array
{
$tempPath = self::downloadFile($fileUrl);
$zip = new ZipArchive();
if ($zip->open($tempPath) !== true) {
throw new \Exception('无法打开文件: ' . $tempPath);
}
$cellImagesXml = $zip->getFromName('xl/cellimages.xml');
if (!$cellImagesXml) {
$zip->close();
throw new \Exception('未找到 cellimages.xml,文件可能不是 WPS 图片格式');
}
$cellImages = new SimpleXMLElement($cellImagesXml);
$namespaces = $cellImages->getNamespaces(true);
$relsXml = $zip->getFromName('xl/_rels/cellimages.xml.rels');
if (!$relsXml) {
$relsXml = $zip->getFromName('xl/drawings/_rels/drawing1.xml.rels');
}
if (!$relsXml) {
$zip->close();
throw new \Exception('未找到图片关系文件 (.rels)');
}
$rels = new SimpleXMLElement($relsXml);
$relsMap = [];
foreach ($rels->children() as $rel) {
$id = (string)$rel['Id'];
$target = (string)$rel['Target'];
$relsMap[$id] = $target;
}
$saveDir = public_path('uploads/wps_images/' . date('Ymd') . '/');
if (!is_dir($saveDir)) mkdir($saveDir, 0777, true);
$imageMap = [];
foreach ($cellImages->children($namespaces['etc'] ?? '') as $item) {
foreach ($item->children($namespaces['xdr'] ?? '') as $pic) {
$nvPicPr = $pic->children($namespaces['xdr'] ?? '')->nvPicPr ?? null;
if (!$nvPicPr) continue;
$cNvPr = $nvPicPr->xpath('.//xdr:cNvPr')[0] ?? null;
if (!$cNvPr) {
$cNvPr = $nvPicPr->cNvPr ?? null;
}
if (!$cNvPr) {
continue;
}
$imgId = (string)$cNvPr['name'];
if (!$imgId) continue;
$blipFill = $pic->children($namespaces['xdr'] ?? '')->blipFill ?? null;
if (!$blipFill) continue;
$blip = $blipFill->children($namespaces['a'] ?? '')->blip ?? null;
if (!$blip) continue;
$rId = (string)$blip->attributes($namespaces['r'] ?? '')['embed'];
if (!$rId || !isset($relsMap[$rId])) continue;
$imagePathInZip = 'xl/' . ltrim($relsMap[$rId], '/');
$imageData = $zip->getFromName($imagePathInZip);
if (!$imageData) continue;
$ext = pathinfo($imagePathInZip, PATHINFO_EXTENSION) ?: 'jpg';
$fileName = uniqid('wps_', true) . '.' . $ext;
$path = '/uploads/wps_images/' . date('Ymd') . '/' . $fileName;
$savePath = $saveDir . $fileName;
file_put_contents($savePath, $imageData);
$imageMap[$imgId] = $path;
}
}
if (!$isReturnCell) {
$zip->close();
@unlink($tempPath);
return $imageMap;
} else {
$spreadsheet = IOFactory::load($tempPath);
$sheet = $spreadsheet->getActiveSheet();
$formulaToImage = [];
foreach ($sheet->getCoordinates() as $cellAddress) {
$value = $sheet->getCell($cellAddress)->getValue();
if (!is_string($value)) continue;
// 匹配 _xlfn.DISPIMG("ID_xxx",1) 这里是读取的源文件的内容,没有经过任何处理,所以这个单元格还是显示的公式,如果经过phpoffice处理,函数会变成#NAME? 直接导致下面的代码无法正确创建映射
if (preg_match('/_xlfn\.DISPIMG\("([^"]+)"/', $value, $match)) {
$imgId = $match[1];
if (isset($imageMap[$imgId])) {
$formulaToImage[$cellAddress] = $imageMap[$imgId];
}
}
}
$zip->close();
@unlink($tempPath);
return $formulaToImage;
}
}
/**
* 下载或复制 Excel 文件到本地临时路径(支持网络路径)
*/
private static function downloadFile(string $fileUrl): string
{
$tempDir = runtime_path('temp_excel');
if (!is_dir($tempDir)) mkdir($tempDir, 0777, true);
$tempPath = $tempDir . uniqid('wps_', true) . '.xlsx';
// 判断是网络路径还是本地路径
if (preg_match('/^https?:\/\//', $fileUrl)) {
$content = @file_get_contents($fileUrl);
if ($content === false) {
throw new \Exception('无法下载 Excel 文件: ' . $fileUrl);
}
file_put_contents($tempPath, $content);
} else {
if (!is_file($fileUrl)) {
throw new \Exception('Excel 文件不存在: ' . $fileUrl);
}
copy($fileUrl, $tempPath);
}
return $tempPath;
}
}
3.使用方法
private static function extractImages1($sheet, string $fileUrl = null)
{
$imgDir = public_path() . 'uploads/excel_img/' . date('Ymd') . '/';
if (!is_dir($imgDir)) mkdir($imgDir, 0777, true);
$images = [];
// 提取 Excel 原生图片
foreach ($sheet->getDrawingCollection() as $drawing) {
$coord = $drawing->getCoordinates();
$row = intval(preg_replace('/\D/', '', $coord));
$filePath = $imgDir . uniqid() . '.png';
if ($drawing instanceof MemoryDrawing) {
// 内存图片
ob_start();
call_user_func($drawing->getRenderingFunction(), $drawing->getImageResource());
$imageData = ob_get_contents();
ob_end_clean();
file_put_contents($filePath, $imageData);
} elseif ($drawing instanceof Drawing) {
// 文件嵌入图片
$source = $drawing->getPath();
copy($source, $filePath);
} else {
continue;
}
$images[$row] = '/uploads/excel_img/' . date('Ymd') . '/' . basename($filePath);
}
// 提取 wps 图片
try {
$hasDispImg = false;
foreach ($sheet->toArray(null, false, false, false) as $row) {
//$sheet->toArray(
// $nullValue = null,
// $calculateFormulas = true, // 这里是关键--保留原始值
// $formatData = true,
// $returnCellRef = false
//);
foreach ($row as $cellValue) {
if (is_string($cellValue) && str_contains($cellValue, '_xlfn.DISPIMG(')) {
$hasDispImg = true;
break 2;
}
}
}
if ($hasDispImg && $fileUrl) {
$wps = new ExtractWPS();
$wpsMap = $wps::extract($fileUrl, true);
foreach ($wpsMap as $cellAddress => $path) {
if (preg_match('/\d+/', $cellAddress, $m)) {
$row = intval($m[0]);
$images[$row] = $path;
}
}
}
} catch (\Throwable $e) {
trace('WPS 图片提取失败: ' . $e->getMessage(), 'error');
}
return $images;
}