当前位置: 首页 > news >正文

PHP大文件处理与流式上传技术

PHP大文件处理与流式上传技术

处理大文件是Web开发中的挑战。PHP的默认配置限制了上传大小和内存使用。今天说说大文件处理和流式上传的实现方案。

PHP配置文件限制上传大小。

```php
// php.ini 配置
// upload_max_filesize = 200M
// post_max_size = 200M
// max_execution_time = 300
// memory_limit = 256M
?>
```

分片上传是大文件上传的标准方案。前端将文件切分成多个小片段,后端逐个接收,最后合并。

```php
class ChunkedUploadHandler
{
private string $uploadDir;
private string $tempDir;

public function __construct(string $uploadDir = '/var/www/uploads')
{
$this->uploadDir = rtrim($uploadDir, '/');
$this->tempDir = $this->uploadDir . '/temp';
foreach ([$this->uploadDir, $this->tempDir] as $dir) {
if (!is_dir($dir)) mkdir($dir, 0755, true);
}
}

public function handleChunk(array $params): array
{
$fileId = $params['file_id'] ?? '';
$chunkIndex = (int)($params['chunk_index'] ?? 0);
$totalChunks = (int)($params['total_chunks'] ?? 1);
$originalName = $params['original_name'] ?? 'unknown';
$totalSize = (int)($params['total_size'] ?? 0);

if (empty($fileId)) {
return ['error' => '缺少文件ID'];
}

// 保存分片
$chunkData = file_get_contents('php://input');
$chunkDir = $this->tempDir . '/' . $fileId;
if (!is_dir($chunkDir)) mkdir($chunkDir, 0755, true);

$chunkFile = $chunkDir . '/' . $chunkIndex;
file_put_contents($chunkFile, $chunkData);

// 检查是否所有分片都上传完成
$receivedChunks = count(glob($chunkDir . '/*'));

if ($receivedChunks >= $totalChunks) {
return $this->mergeChunks($fileId, $originalName);
}

return [
'success' => true,
'received' => $receivedChunks,
'total' => $totalChunks,
'progress' => round($receivedChunks / $totalChunks * 100, 1) . '%',
];
}

private function mergeChunks(string $fileId, string $originalName): array
{
$chunkDir = $this->tempDir . '/' . $fileId;
$files = glob($chunkDir . '/*');
sort($files, SORT_NUMERIC);

$ext = pathinfo($originalName, PATHINFO_EXTENSION);
$newName = bin2hex(random_bytes(16)) . ($ext ? ".{$ext}" : '');
$destPath = $this->uploadDir . '/' . $newName;
$destFile = fopen($destPath, 'wb');

foreach ($files as $file) {
$chunkData = file_get_contents($file);
fwrite($destFile, $chunkData);
unlink($file);
}

fclose($destFile);
rmdir($chunkDir);

return [
'success' => true,
'file_path' => $newName,
'file_size' => filesize($destPath),
'original_name' => $originalName,
];
}

public function getUploadProgress(string $fileId): array
{
$chunkDir = $this->tempDir . '/' . $fileId;
if (!is_dir($chunkDir)) {
return ['received' => 0, 'total' => 0, 'progress' => '0%'];
}

$received = count(glob($chunkDir . '/*'));
return ['received' => $received, 'progress' => "{$received}个分片已接收"];
}
}
?>

流式处理大文件的核心是逐块处理,不一次性加载到内存。

```php
class LargeFileProcessor
{
public function processLargeFile(string $path, callable $lineCallback): int
{
$handle = fopen($path, 'r');
if ($handle === false) {
throw new RuntimeException("无法打开文件: $path");
}

$lineCount = 0;
while (($line = fgets($handle)) !== false) {
$lineCount++;
$lineCallback(trim($line), $lineCount);
}

fclose($handle);
return $lineCount;
}

public function processInChunks(string $path, int $chunkSize = 8192, callable $chunkCallback): int
{
$handle = fopen($path, 'rb');
if ($handle === false) {
throw new RuntimeException("无法打开文件: $path");
}

$totalRead = 0;
while (!feof($handle)) {
$chunk = fread($handle, $chunkSize);
$bytes = strlen($chunk);
if ($bytes === 0) break;

$totalRead += $bytes;
$chunkCallback($chunk, $totalRead);
}

fclose($handle);
return $totalRead;
}

public function filterLargeFile(string $source, string $dest, callable $filter): void
{
$sourceHandle = fopen($source, 'r');
$destHandle = fopen($dest, 'w');

if ($sourceHandle === false || $destHandle === false) {
throw new RuntimeException("文件操作失败");
}

while (($line = fgets($sourceHandle)) !== false) {
$line = trim($line);
if ($filter($line)) {
fwrite($destHandle, $line . "\n");
}
}

fclose($sourceHandle);
fclose($destHandle);
}
}

$processor = new LargeFileProcessor();

// 生成大文件
$largeFile = '/tmp/large_data.txt';
$handle = fopen($largeFile, 'w');
for ($i = 0; $i < 100000; $i++) {
fwrite($handle, "行_{$i}," . rand(1000, 9999) . "," . bin2hex(random_bytes(8)) . "\n");
}
fclose($handle);

// 逐行处理
$count = $processor->processLargeFile($largeFile, function ($line, $num) {
if ($num % 25000 === 0) {
echo "已处理 {$num} 行\n";
}
});
echo "共处理 {$count} 行\n";
?>
```

生成器在处理大数据集时能显著减少内存占用。

```php
class LargeDataSet
{
public function readCsv(string $path): Generator
{
$handle = fopen($path, 'r');
if ($handle === false) return;

$headers = fgetcsv($handle);
if ($headers === false) return;

yield $headers;

while (($row = fgetcsv($handle)) !== false) {
yield array_combine($headers, $row);
}

fclose($handle);
}

public function generateData(int $count): Generator
{
for ($i = 0; $i < $count; $i++) {
yield [
'id' => $i + 1,
'name' => "Item_{$i}",
'value' => rand(1, 1000),
'created' => date('Y-m-d H:i:s', time() + $i),
];
}
}
}

$csvFile = '/tmp/large.csv';
$fp = fopen($csvFile, 'w');
fputcsv($fp, ['id', 'name', 'value', 'created']);
for ($i = 0; $i < 50000; $i++) {
fputcsv($fp, [$i + 1, "Item_{$i}", rand(1, 1000), date('Y-m-d')]);
}
fclose($fp);

$memoryStart = memory_get_usage(true);
$reader = new LargeDataSet();

$count = 0;
foreach ($reader->readCsv($csvFile) as $index => $row) {
if (is_array($row) && isset($row['id'])) {
$count++;
}
}

echo "读取 {$count} 行,内存使用: " . (memory_get_usage(true) - $memoryStart) / 1024 . " KB\n";
?>
```

大文件处理的关键是避免一次性加载到内存。分片上传适合网络传输,流式读取适合本地处理,生成器适合大数据集遍历。PHP的流式处理能力配合生成器,可以处理任意大小的文件。

http://www.rkmt.cn/news/1456975.html

相关文章:

  • 2026年湖南正规职业高中推荐:首批入围院校盘点 - 优质品牌商家
  • 终极指南:3步快速搞定视频自动字幕生成,免费开源神器VideoSrt完整教程
  • 别只盯着算法!手把手教你用Python复现LINE论文中的边缘采样(Alias Method)与负采样优化
  • 智能任务超时熔断机制缺失导致成本飙升217%?5个生产环境真实Case与实时决策树模型
  • DIY蓝牙耳机改造指南:从有线到无线的核心步骤与避坑要点
  • 5步告别激活烦恼:KMS_VL_ALL_AIO智能激活脚本完全指南
  • 如何将任天堂Joy-Con变成Windows上的Xbox手柄?XJoy开源方案完全指南
  • 告别Kali黑屏噩梦:深度解析LightDM/GDM3显示管理器冲突与Xorg配置修复
  • 基于Arduino与GRBL的桌面数控写字机DIY全攻略
  • 3个核心技巧:如何用SI6 Networks IPv6 Toolkit提升网络安全评估效率
  • 终极项目管理指南:用GanttProject实现高效项目规划与跟踪
  • c# solidworks 自动标注折弯7 图可视化,清晰定义,画点改画线
  • Python为何成为TVA的神经与感官系统(9)
  • 【限时解密】头部金融科技公司AI任务编排内参(含12类异构API适配器源码+任务血缘图谱生成脚本)
  • 掌握智能窗口管理:解锁高效工作流的专业窗口强制调整工具
  • 线上内存溢出?一次关于 Pandas 大数据量下 Python GC 机制的极限调优实战
  • Windows 11终极优化指南:用Win11Debloat一键提升51%系统性能,彻底告别卡顿与隐私泄露
  • 一键备份QQ空间回忆:GetQzonehistory完整使用指南
  • 5步轻松掌握fanqienovel-downloader:打造永不消失的个人小说图书馆
  • HBase与Hadoop:基于什么开发?深度剖析与架构图
  • 2026苏州防水维修哪家好?权威靠谱防水公司推荐|全屋漏水根治测评 - 苏易修缮
  • 【2024最稀缺整合方案】:基于LLM+GraphDB的社区智能治理系统,已验证提升用户留存率41.7%
  • 终极指南:如何用FanControl免费实现Windows风扇智能控制
  • B站缓存视频转换神器:3步实现m4s到MP4的无损快速转换
  • 【MySQL高阶】20.InnoDB 磁盘文件
  • TestDisk与PhotoRec:开源数据恢复双雄的终极完整指南
  • 什么叫无状态
  • 三星手机怎么连接 Windows 电脑?5 种实用连接方法
  • 今日开源[第5期]Headroom - zhang
  • 3分钟掌握IDM激活脚本:开源工具实现永久免费下载加速