memories/lib/Exif.php

464 lines
14 KiB
PHP
Raw Permalink Normal View History

2022-08-20 02:53:21 +00:00
<?php
2022-10-19 17:10:36 +00:00
2022-08-20 02:53:21 +00:00
declare(strict_types=1);
namespace OCA\Memories;
use OCA\Memories\AppInfo\Application;
use OCA\Memories\Service\BinExt;
2022-08-20 02:53:21 +00:00
use OCP\Files\File;
2022-10-19 17:10:36 +00:00
class Exif
{
private const FORBIDDEN_EDIT_MIMES = ['image/bmp', 'image/x-dcraw', 'video/MP2T'];
2022-11-24 11:18:35 +00:00
private const EXIFTOOL_TIMEOUT = 30000;
2023-02-24 17:28:57 +00:00
private const EXIFTOOL_ARGS = ['-api', 'QuickTimeUTC=1', '-n', '-U', '-json', '--b'];
2022-10-20 21:05:01 +00:00
/** Opened instance of exiftool when running in command mode */
2022-10-19 17:10:36 +00:00
private static $staticProc;
private static $staticPipes;
private static $noStaticProc = false;
2022-10-19 17:10:36 +00:00
public static function closeStaticExiftoolProc()
{
try {
if (self::$staticProc) {
fclose(self::$staticPipes[0]);
fclose(self::$staticPipes[1]);
fclose(self::$staticPipes[2]);
proc_terminate(self::$staticProc);
proc_close(self::$staticProc);
2022-09-15 01:14:06 +00:00
self::$staticProc = null;
self::$staticPipes = null;
}
2022-10-19 17:10:36 +00:00
} catch (\Exception $ex) {
}
}
2022-10-19 17:10:36 +00:00
public static function restartStaticExiftoolProc()
{
2022-09-15 01:14:06 +00:00
self::closeStaticExiftoolProc();
self::ensureStaticExiftoolProc();
}
2022-10-19 17:10:36 +00:00
public static function ensureStaticExiftoolProc()
{
if (self::$noStaticProc) {
return;
}
if (!self::$staticProc) {
self::initializeStaticExiftoolProc();
2022-09-09 15:18:55 +00:00
usleep(500000); // wait if error
2022-10-19 17:10:36 +00:00
if (!proc_get_status(self::$staticProc)['running']) {
error_log('WARN: Failed to create stay_open exiftool process');
self::$noStaticProc = true;
self::$staticProc = null;
}
2022-10-19 17:10:36 +00:00
return;
}
2022-10-19 17:10:36 +00:00
if (!proc_get_status(self::$staticProc)['running']) {
self::$staticProc = null;
self::ensureStaticExiftoolProc();
}
}
2022-08-20 02:53:21 +00:00
/**
* Get exif data as a JSON object from a Nextcloud file.
*/
public static function getExifFromFile(File $file)
2022-10-19 17:10:36 +00:00
{
try {
$path = $file->getStorage()->getLocalFile($file->getInternalPath());
} catch (\Throwable $ex) {
// https://github.com/pulsejet/memories/issues/820
throw new \Exception('Failed to get local file: '.$ex->getMessage());
}
if (!\is_string($path)) {
throw new \Exception('Failed to get local file path');
2022-08-20 02:53:21 +00:00
}
$exif = self::getExifFromLocalPath($path);
// We need to remove blacklisted fields to prevent leaking info
unset($exif['SourceFile'], $exif['FileName'], $exif['ExifToolVersion'], $exif['Directory'], $exif['FileSize'], $exif['FileModifyDate'], $exif['FileAccessDate'], $exif['FileInodeChangeDate'], $exif['FilePermissions'], $exif['ThumbnailImage']);
// Ignore zero dates
$dateFields = [
'DateTimeOriginal',
'SubSecDateTimeOriginal',
'CreateDate',
'ModifyDate',
'TrackCreateDate',
'TrackModifyDate',
'MediaCreateDate',
'MediaModifyDate',
];
foreach ($dateFields as $field) {
if (\array_key_exists($field, $exif) && \is_string($exif[$field]) && str_starts_with($exif[$field], '0000:00:00')) {
unset($exif[$field]);
}
2023-02-24 05:19:09 +00:00
}
return $exif;
2022-08-20 02:53:21 +00:00
}
/** Get exif data as a JSON object from a local file path */
public static function getExifFromLocalPath(string $path)
2022-10-19 17:10:36 +00:00
{
if (null !== self::$staticProc) {
2022-08-26 00:37:40 +00:00
self::ensureStaticExiftoolProc();
2022-09-15 01:14:06 +00:00
2022-10-19 17:10:36 +00:00
return self::getExifFromLocalPathWithStaticProc($path);
2022-09-15 01:14:06 +00:00
}
2022-10-19 17:10:36 +00:00
return self::getExifFromLocalPathWithSeparateProc($path);
}
2022-09-25 13:21:40 +00:00
/**
2022-10-19 17:10:36 +00:00
* Parse date from exif format and throw error if invalid.
2022-09-25 13:21:40 +00:00
*/
public static function parseExifDate(array $exif): \DateTime
2022-10-19 17:10:36 +00:00
{
// Get date from exif
$exifDate = $exif['DateTimeOriginal'] ?? $exif['CreateDate'] ?? null;
// For videos, prefer CreateDate for timezone (QuickTimeUTC=1)
if (preg_match('/^video\/\w+/', (string) $exif['MIMEType'])) {
$exifDate = $exif['CreateDate'] ?? $exifDate;
}
// Check if we have a date
if (null === $exifDate || empty($exifDate) || !\is_string($exifDate)) {
throw new \Exception('No date found in exif');
}
// Get timezone from exif
try {
$exifTz = $exif['OffsetTimeOriginal'] ?? $exif['OffsetTime'] ?? $exif['LocationTZID'] ?? null;
$exifTz = new \DateTimeZone($exifTz);
} catch (\Error $e) {
$exifTz = null;
}
// Force UTC if no timezone found
$parseTz = $exifTz ?? new \DateTimeZone('UTC');
// https://github.com/pulsejet/memories/pull/397
// https://github.com/pulsejet/memories/issues/485
$formats = [
'Y:m:d H:i', // 2023:03:05 18:58
'Y:m:d H:iO', // 2023:03:05 18:58+05:00
'Y:m:d H:i:s', // 2023:03:05 18:58:17
'Y:m:d H:i:sO', // 2023:03:05 10:58:17+05:00
'Y:m:d H:i:s.u', // 2023:03:05 10:58:17.000
'Y:m:d H:i:s.uO', // 2023:03:05 10:58:17.000Z
];
/** @var \DateTime $dt */
$parsedDate = null;
foreach ($formats as $format) {
if ($parsedDate = \DateTime::createFromFormat($format, $exifDate, $parseTz)) {
break;
2022-09-25 13:21:40 +00:00
}
}
2022-10-19 17:10:36 +00:00
// If we couldn't parse the date, throw an error
if (!$parsedDate) {
throw new \Exception("Invalid date: {$exifDate}");
2022-09-25 13:21:40 +00:00
}
// Epoch timestamp
$timestamp = $parsedDate->getTimestamp();
// Filter out dates before 1800 A.D.
if ($timestamp < -5364662400) { // 1800 A.D.
throw new \Exception("Date too old: {$exifDate}");
2022-09-27 21:05:26 +00:00
}
2022-10-19 17:10:36 +00:00
// Filter out January 1, 1904 12:00:00 AM UTC
// Exiftool returns this as the date when QuickTimeUTC is set and
// the date is set to 0000:00:00 00:00:00
if (-2082844800 === $timestamp) {
throw new \Exception("Blacklisted date: {$exifDate}");
}
// Force the timezone to be the same as parseTz
if ($exifTz) {
$parsedDate->setTimezone($exifTz);
}
return $parsedDate;
2022-09-27 21:05:26 +00:00
}
2022-08-20 02:53:21 +00:00
/**
* Get the date taken from either the file or exif data if available.
*/
public static function getDateTaken(File $file, array $exif): \DateTime
2022-10-19 17:10:36 +00:00
{
2022-09-25 13:21:40 +00:00
try {
return self::parseExifDate($exif);
2022-10-11 19:57:55 +00:00
} catch (\Exception $ex) {
} catch (\ValueError $ex) {
}
2022-08-20 02:53:21 +00:00
// Fall back to modification time
$dt = new \DateTime('@'.$file->getMtime());
// Set timezone to system timezone
$tz = getenv('TZ') ?: date_default_timezone_get();
try {
$dt->setTimezone(new \DateTimeZone($tz));
} catch (\Exception $e) {
throw new \Error("FATAL: system timezone is invalid (TZ): {$tz}");
}
return $dt;
}
/**
* Convert time to local date in UTC.
*/
public static function forgetTimezone(\DateTime $date): \DateTime
{
return new \DateTime($date->format('Y-m-d H:i:s'), new \DateTimeZone('UTC'));
2022-08-20 02:53:21 +00:00
}
2022-09-25 13:21:40 +00:00
2022-10-15 19:15:07 +00:00
/**
2022-10-19 17:10:36 +00:00
* Get image dimensions from Exif data.
*
2022-10-15 19:15:07 +00:00
* @return array [width, height]
*/
public static function getDimensions(array $exif)
2022-10-19 17:10:36 +00:00
{
2022-10-15 19:15:07 +00:00
$width = $exif['ImageWidth'] ?? 0;
$height = $exif['ImageHeight'] ?? 0;
2022-10-16 05:23:07 +00:00
// Check if image is rotated and we need to swap width and height
$rotation = $exif['Rotation'] ?? 0;
$orientation = $exif['Orientation'] ?? 0;
2022-10-19 17:10:36 +00:00
if (\in_array($orientation, [5, 6, 7, 8], true) || \in_array($rotation, [90, 270], true)) {
2022-10-16 05:23:07 +00:00
return [$height, $width];
}
2022-12-03 07:50:33 +00:00
if ($width <= 0 || $height <= 0 || $width > 100000 || $height > 100000) {
2022-10-28 16:48:08 +00:00
return [0, 0];
}
2022-10-15 19:15:07 +00:00
return [$width, $height];
}
/**
* Get the Approximate Unique ID (AUID) from parameters.
*
* @param int $epoch the date taken as a unix timestamp (seconds)
* @param int $size the file size in bytes
*/
public static function getAUID(int $epoch, int $size): int
{
return crc32($epoch.$size);
}
/**
* Get the list of MIME Types that are allowed to be edited.
*/
public static function allowedEditMimetypes(): array
{
return array_diff(array_merge(Application::IMAGE_MIMES, Application::VIDEO_MIMES), self::FORBIDDEN_EDIT_MIMES);
}
2022-09-25 13:21:40 +00:00
/**
2022-11-10 06:19:44 +00:00
* Set exif data using raw json.
*
* @param string $path to local file
* @param array $data exif data
2022-09-25 13:21:40 +00:00
*
2022-11-10 06:19:44 +00:00
* @throws \Exception on failure
2022-09-25 13:21:40 +00:00
*/
public static function setExif(string $path, array $data)
2022-10-19 17:10:36 +00:00
{
2022-11-10 06:19:44 +00:00
$data['SourceFile'] = $path;
$raw = json_encode([$data], JSON_UNESCAPED_UNICODE);
$cmd = array_merge(self::getExiftool(), [
'-overwrite_original',
'-api', 'LargeFileSupport=1',
'-json=-', $path,
]);
2022-11-10 06:19:44 +00:00
$proc = proc_open($cmd, [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
], $pipes);
2022-09-25 13:21:40 +00:00
2022-11-10 06:19:44 +00:00
fwrite($pipes[0], $raw);
fclose($pipes[0]);
2022-10-19 17:10:36 +00:00
2022-11-24 11:18:35 +00:00
$stdout = self::readOrTimeout($pipes[1], self::EXIFTOOL_TIMEOUT);
2022-11-10 06:19:44 +00:00
fclose($pipes[1]);
fclose($pipes[2]);
proc_terminate($proc);
proc_close($proc);
2022-11-10 06:19:44 +00:00
if (false !== strpos($stdout, 'error')) {
error_log("Exiftool error: {$stdout}");
2022-10-19 17:10:36 +00:00
2022-11-10 06:19:44 +00:00
throw new \Exception('Could not set exif data: '.$stdout);
2022-09-25 13:21:40 +00:00
}
}
2022-10-19 17:10:36 +00:00
public static function setFileExif(File $file, array $data)
{
// Get path to local file so we can skip reading
$path = $file->getStorage()->getLocalFile($file->getInternalPath());
// Set exif data
self::setExif($path, $data);
// Update remote file if not local
if (!$file->getStorage()->isLocal()) {
$file->putContent(fopen($path, 'r')); // closes the handler
}
// Touch the file, triggering a reprocess through the hook
$file->touch();
}
public static function getBinaryExifProp(string $path, string $prop)
{
$pipes = [];
$proc = proc_open(array_merge(self::getExiftool(), [$prop, '-n', '-b', $path]), [
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
], $pipes);
stream_set_blocking($pipes[1], false);
try {
2022-11-24 11:18:35 +00:00
return self::readOrTimeout($pipes[1], self::EXIFTOOL_TIMEOUT);
} catch (\Exception $ex) {
error_log("Exiftool timeout: [{$path}]");
throw new \Exception('Could not read from Exiftool');
} finally {
fclose($pipes[1]);
fclose($pipes[2]);
proc_terminate($proc);
proc_close($proc);
}
}
public static function getExifWithDuplicates(string $path)
{
return self::getExifFromLocalPathWithSeparateProc($path, ['-G4']);
}
private static function getExiftool(): array
2022-10-20 19:31:12 +00:00
{
return BinExt::getExiftool();
2022-10-20 19:31:12 +00:00
}
2022-10-19 17:10:36 +00:00
/** Initialize static exiftool process for local reads */
private static function initializeStaticExiftoolProc()
{
self::closeStaticExiftoolProc();
2022-10-30 04:23:20 +00:00
self::$staticProc = proc_open(array_merge(self::getExiftool(), ['-stay_open', 'true', '-@', '-']), [
2022-10-19 17:10:36 +00:00
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
], self::$staticPipes);
stream_set_blocking(self::$staticPipes[1], false);
}
/**
* Read from non blocking handle or throw timeout.
*
* @param resource $handle
* @param int $timeout milliseconds
* @param string $delimiter null for eof
*/
2022-11-24 02:28:34 +00:00
private static function readOrTimeout($handle, int $timeout, ?string $delimiter = null)
2022-10-19 17:10:36 +00:00
{
$buf = '';
$waitedMs = 0;
while ($waitedMs < $timeout && ($delimiter ? !str_ends_with($buf, $delimiter) : !feof($handle))) {
$r = stream_get_contents($handle);
if (empty($r)) {
++$waitedMs;
usleep(1000);
continue;
}
$buf .= $r;
}
if ($waitedMs >= $timeout) {
throw new \Exception('Timeout');
}
return $buf;
}
private static function getExifFromLocalPathWithStaticProc(string $path)
2022-10-19 17:10:36 +00:00
{
2023-02-24 17:28:57 +00:00
$args = implode("\n", self::EXIFTOOL_ARGS);
fwrite(self::$staticPipes[0], "{$path}\n{$args}\n-execute\n");
2022-10-19 17:10:36 +00:00
fflush(self::$staticPipes[0]);
$readyToken = "\n{ready}\n";
try {
2022-11-24 11:18:35 +00:00
$buf = self::readOrTimeout(self::$staticPipes[1], self::EXIFTOOL_TIMEOUT, $readyToken);
2022-10-19 17:10:36 +00:00
$tokPos = strrpos($buf, $readyToken);
$buf = substr($buf, 0, $tokPos);
return self::processStdout($buf);
} catch (\Exception $ex) {
error_log("ERROR: Exiftool may have crashed, restarting process [{$path}]");
self::restartStaticExiftoolProc();
throw new \Exception('Nothing to read from Exiftool');
}
}
private static function getExifFromLocalPathWithSeparateProc(string $path, array $extraArgs = [])
2022-10-19 17:10:36 +00:00
{
$pipes = [];
2023-02-24 17:28:57 +00:00
$proc = proc_open(array_merge(self::getExiftool(), self::EXIFTOOL_ARGS, $extraArgs, [$path]), [
2022-10-19 17:10:36 +00:00
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
], $pipes);
stream_set_blocking($pipes[1], false);
try {
2022-11-24 11:18:35 +00:00
$stdout = self::readOrTimeout($pipes[1], self::EXIFTOOL_TIMEOUT);
2022-10-19 17:10:36 +00:00
return self::processStdout($stdout);
} catch (\Exception $ex) {
error_log("Exiftool timeout: [{$path}]");
throw new \Exception('Could not read from Exiftool');
} finally {
fclose($pipes[1]);
fclose($pipes[2]);
proc_terminate($proc);
proc_close($proc);
2022-10-19 17:10:36 +00:00
}
}
/** Get json array from stdout of exiftool */
private static function processStdout(string $stdout)
2022-10-19 17:10:36 +00:00
{
$json = json_decode($stdout, true);
if (!$json) {
throw new \Exception('Could not read exif data');
}
return $json[0];
}
}