Commit d5901897 authored by Andrey Filippov's avatar Andrey Filippov

correctly processing split files, modified filenames for sorting

parent b46a9a1d
......@@ -21,12 +21,22 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Found problem - does not process files that start in one chunk and end in another?
Found problem - does not process files that start in one file and end in another?
*/
define('START_TIFF', hex2bin('4d4d002a')); // start of a TIFF, JP/JP4 also have this TIFF marker
define('START_JP', hex2bin('ffd8ffe1')); // start of JPEG/JP4, TIFF usually does not have JPEG/JP4 markers
define('START_EXIF', hex2bin("457869660000")); // Exif\0\0
define('EXIF_OFFSET', 6); // Exif start offset from the beginning of the file (from start of START_JP)
define('END_JP', hex2bin('ffd9')); // end of JPEG/JP4 file
define('MAX_HEAD_LEN', 0x300); // maximal header length that includes complete header
//disable the default time limit for php scripts.
define('MIN_JP_LEN', 0x500); // skip from jpeg looking for end marker
define('MAX_IMG_LEN', 0x1000000); // maximal image size (16M) to look for the end marker
define('REQUIRE_EXIF', 1); // JPEG/JP4 must have Exif (more filtering, less false positives)
//define('DEBUG_LEVEL', 3); // Generate debug output
//define('DEBUG_OUT_FILES',DEBUG_LEVEL? 1 : 0 ); // 1 to add sequence number to result image names (make all unique)
define('DEBUG_LEVEL', 1); // Generate debug output
define('DEBUG_OUT_FILES',DEBUG_LEVEL? 0 : 0 ); // 1 to add sequence number to result image names (make all unique)
//disable the default time limit for php scripts.
set_time_limit(0);
$chunksize=10000000; //10MB
......@@ -50,26 +60,26 @@ function print_help(){
global $argv;
echo <<<"TXT"
Help:
* Usage:
~$ {$argv[0]} path=[path-to-dir] dest_path=[dest-subdir] move_processed=[move-processed-files] ext=[forced-ext] chn_offs=[add-to-chn]
where:
* path-to-dir - string - scan this path + 1 dir down
* dest-subdir - string - save results to "path-to-dir/dest-subdir/"
* move-processed-files - 0(default) or 1 - if not 1 - will not move the processed files
* forced-ext - string - override extensions from exifs with this one
* add-to-chn - integer - add to decoded channel number
Help:
* Usage:
~$ {$argv[0]} path=[path-to-dir] dest_path=[dest-subdir] move_processed=[move-processed-files] ext=[forced-ext] chn_offs=[add-to-chn]
where:
* path-to-dir - string - scan this path + 1 dir down
* dest-subdir - string - save results to "path-to-dir/dest-subdir/"
* move-processed-files - 0(default) or 1 - if not 1 - will not move the processed files
* forced-ext - string - override extensions from exifs with this one
* add-to-chn - integer - add to decoded channel number
* Examples:
** Split all *.img, *.bin and *.mov files in the current dir and 1 dir down, puts results to '0/':
~$ {$argv[0]}
** Split in /data/test + 1 dir down, create and move processed files to /data/test/processed for files in path and /data/test/any-found-subdir/processed for any files found in /data/test/any-found-subdir
~$ {$argv[0]} path=/data/test move_processed=1
** Split all *.img, *.bin and *.mov files in the current dir and 1 dir down, puts results to 'results/', override extensions with 'jpg':
~$ {$argv[0]} dest_path=results ext=jpg
* Examples:
** Split all *.img, *.bin and *.mov files in the current dir and 1 dir down, puts results to '0/':
~$ {$argv[0]}
** Split in /data/test + 1 dir down, create and move processed files to /data/test/processed for files in path and /data/test/any-found-subdir/processed for any files found in /data/test/any-found-subdir
~$ {$argv[0]} path=/data/test move_processed=1
** Split all *.img, *.bin and *.mov files in the current dir and 1 dir down, puts results to 'results/', override extensions with 'jpg':
~$ {$argv[0]} dest_path=results ext=jpg
TXT;
TXT;
}
......@@ -115,25 +125,48 @@ if (isset($_GET['chn_offs'])){
}
$list = preg_grep('/^([^.])/', scandir($path));
sort($list); // also re-indexes
//$list=array();
if (!is_dir("$path/$destination")) mkdir("$path/$destination",0777);
foreach ($list as $item) {
if (is_dir("$path/$item")){
if ($item==$processed_subdir) continue;
$sublist = preg_grep('/^([^.])/', scandir("$path/$item"));
foreach($sublist as $subitem){
if (split_file("$path/$item","$subitem","../$destination",$add_to_chn)==0){
if ($move_processed){
if (!is_dir("$path/$item/$processed_subdir")){
mkdir("$path/$item/$processed_subdir",0777);
}
rename("$path/$item/$subitem","$path/$item/$processed_subdir/$subitem");
if (!is_dir("$path/$destination")) mkdir("$path/$destination",0777);
if ((DEBUG_LEVEL >= 2)) {
var_dump($list);
}
//exit(0);
/*
array(4) {
[2]=>
string(10) "file_0.img"
[3]=>
string(10) "file_1.img"
[4]=>
string(10) "file_2.img"
[5]=>
string(7) "results"
}
*/
// should not mix files and directories - should be same level
foreach ($list as $index=>$item) {
if (is_dir("$path/$item")) {
if ($item == $processed_subdir)
continue;
$sublist = preg_grep('/^([^.])/', scandir("$path/$item"));
sort($sublist);
foreach ($sublist as $file_index => $subitem) {
$next_file = ($file_index < (count($sublist) -1)) ? $sublist[$file_index + 1] : "";
if (split_file("$path/$item", "$subitem", "../$destination", $add_to_chn, $next_file) == 0) {
if ($move_processed) {
if (! is_dir("$path/$item/$processed_subdir")) {
mkdir("$path/$item/$processed_subdir", 0777);
}
rename("$path/$item/$subitem", "$path/$item/$processed_subdir/$subitem");
}
}
}
}
}
}else{
if (split_file("$path","$item","$destination",$add_to_chn)==0){
$next_file = ($index < (count($list)-1)) ? $list[$index + 1] : "";
if (split_file("$path","$item","$destination",$add_to_chn, $next_file)==0){
if ($move_processed){
if (!is_dir("$path/$processed_subdir")){
mkdir("$path/$processed_subdir",0777);
......@@ -145,90 +178,174 @@ foreach ($list as $item) {
}
//./extract_images_tiff.php path=/home/eyesis/captures/tests/jp4/ dest_path=results chn_offs=16
function split_file($path,$file,$destination,$add_to_chn=-1){
// global $startMarkerWithExif = START_JP; // START_TIFF
global $chunksize;
global $input_exts;
global $forced_ext;
if (in_array(get_ext("$path/$file"),$input_exts)) {
echo date(DATE_RFC2822).": Splitting $path/$file, results dir: $path/$destination\n";
//split_mov("$path",$file,$destination,$extension,$startMarkerWithExif,$chunksize);
$file_type = 0; // JP4/JPEG
$markers=array();
$offset =0;
$f=fopen("$path/$file","r");
$s = fread($f,$chunksize);
$pos_jp = strpos($s,START_JP);
$pos_tiff = strpos($s,START_TIFF);
if (($pos_jp === false) && ($pos_tiff === false)) {
print ("None of TIFF (".bin2hex(START_TIFF).") or JP4/JPEG (".bin2hex(START_JP).") markers found in the first ".$chunksize." bytes of the file $path/$file\n");
return -1;
}
if ($pos_jp === false) {
$file_type = 1; // tiff
} else if (($pos_tiff !== false) && ($pos_tiff < $pos_jp)){ // reducing probability of stray START_JP
$file_type = 1; // tiff
} else {
$file_type = 0; // jpeg/jp4
}
$startMarkerWithExif = $file_type? START_TIFF: START_JP; // START_TIFF
echo "Detected image type ".($file_type?'TIFF':'JPEG/JP4').".\n";
$forced_ext = $file_type?'tiff':''; // was ".tiff":""
fclose($f); // not sure if feof is cleraed by fseek, closing/ reopening
$f=fopen("$path/$file","r");
//first scan
while (!feof($f)) {
$pos=0;
$index=0;
fseek($f,$offset);
$s = fread($f,$chunksize);
while(true){
$pos=strpos($s,$startMarkerWithExif,$pos);
if ($pos === false) break;
// echo count($markers).': '.strval(($offset+$pos)% 4096) .' ('.strval($offset+$pos).')'; // debugging
$markers[count($markers)]=$offset+$pos;
$pos++;
}
$offset+=(strlen($s)-strlen($startMarkerWithExif)+1); // so each marker will appear once
function split_file($path, $file, $destination, $add_to_chn = - 1, $next_file = "")
{
if ($next_file && !is_file("$path/$next_file")){
if ((DEBUG_LEVEL >= 1)) {
printf("Ignoring $next_file - it is a DIRECTORY\n");
}
$next_file="";
}
$markers[count($markers)]=$offset+strlen($s); // full length of the file
echo " images found: ".(count($markers)-1)."\n";
//second scan
for ($i=0;$i<(count($markers)-1);$i++) {
fseek($f,$markers[$i]);
$s = fread($f,$markers[$i+1]-$markers[$i]);
// global $startMarkerWithExif = START_JP; // START_TIFF
global $chunksize;
global $input_exts;
global $forced_ext;
$tmp_name = "$path/$destination/header.tmp"; // image.tmp";
// file_put_contents($tmp_name,$s);
file_put_contents($tmp_name,substr($s, 0, MAX_HEAD_LEN)); // only save beginning of the file
// printf ("header length = %d\n", strlen(substr($s, 0, MAX_HEAD_LEN)));
$image_length=-1;
$result_name = elphel_specific_result_name($tmp_name, $image_length, $add_to_chn);
if ($image_length < 0){
printf ("Wrong image length from Exif header: %d", $image_length);
exit(1);
}
$dest_image = "$path/$destination/$result_name"; // $result_name may now include "/"
$dest_set_dir = dirname($dest_image);
if (!is_dir($dest_set_dir)){
mkdir($dest_set_dir,0777);
}
// rename($tmp_name, $dest_image); // "$path/$destination/$result_name");
file_put_contents($dest_image,substr($s, 0, $image_length));
if (in_array(get_ext("$path/$file"), $input_exts)) {
$next_path = $next_file ? "$path/$next_file" : "";
echo date(DATE_RFC2822) . ": Splitting $path/$file (next is $next_path):, results dir: $path/$destination\n";
// split_mov("$path",$file,$destination,$extension,$startMarkerWithExif,$chunksize);
$file_type = 0; // JP4/JPEG
$markers = array();
$offset = 0;
$f = fopen("$path/$file", "r");
$s = fread($f, $chunksize);
$pos_jp = strpos($s, START_JP);
$pos_tiff = strpos($s, START_TIFF);
if (($pos_jp === false) && ($pos_tiff === false)) {
if (strlen($s) == $chunksize) {
print("None of TIFF (" . bin2hex(START_TIFF) . ") or JP4/JPEG (" . bin2hex(START_JP) . ") markers found in the first " . $chunksize . " bytes of the file $path/$file\n");
} else {
print("None of TIFF (" . bin2hex(START_TIFF) . ") or JP4/JPEG (" . bin2hex(START_JP) . ") markers found in the remaining " . strlen($s) . " bytes of the file $path/$file\n");
}
return - 1;
}
if ($pos_jp === false) {
$file_type = 1; // tiff
} else if (($pos_tiff !== false) && ($pos_tiff < $pos_jp)) { // reducing probability of stray START_JP
$file_type = 1; // tiff
} else {
$file_type = 0; // jpeg/jp4
}
$startMarkerWithExif = $file_type ? START_TIFF : START_JP; // START_TIFF
echo "Detected image type " . ($file_type ? 'TIFF' : 'JPEG/JP4') . ".\n";
$forced_ext = $file_type ? 'tiff' : ''; // was ".tiff":""
fclose($f); // not sure if feof is cleraed by fseek, closing/ reopening
$f = fopen("$path/$file", "r");
// first scan
while (! feof($f)) {
$pos = 0;
$index = 0;
fseek($f, $offset); // will read from the last
$s = fread($f, $chunksize);
if ($next_file && (strlen($s) < $chunksize)) { // special case, try to add beginning of the next file
printf("Looking for the marker in the file split, using %s", $next_file);
$f1 = fopen("$path/$next_file", "r");
$s1 = fread($f1, strlen($startMarkerWithExif) - 1);
fclose($f1);
$s .= $s1;
}
while (true) {
$pos = strpos($s, $startMarkerWithExif, $pos);
if ($pos === false) {
break;
}
if (REQUIRE_EXIF && ($file_type == 0) && (strpos($s, START_EXIF, $pos) != ($pos + EXIF_OFFSET))) { // JPEG/JP4
printf("Could not find required by REQUIRE_EXIF==%d Exif signature at offset %d. Skipping this marker (false positive?)\n", REQUIRE_EXIF, START_EXIF);
} else {
$markers[count($markers)] = $offset + $pos;
}
$pos ++;
}
// steps back by needle length-1, so partial markers will not be lost
$offset += (strlen($s) - strlen($startMarkerWithExif) + 1); // so each marker will appear once
}
$markers[count($markers)] = $offset + strlen($s); // full length of the file
$tmp_name = $file_type? "$path/$destination/header.tmp": "$path/$destination/image.tmp";
echo " images found: " . (count($markers) - 1) . "\n";
// second scan
$dbg_last = 0;
for ($i = 0; $i < (count($markers) - 1); $i ++) {
if ((DEBUG_LEVEL >= 2)) {
if ($i == 0) {
printf("1.First image[%d], markers[%d]=0x%08x, markers[%d]=0x%08x, diff=0x%08x\n", $i, $i, $markers[$i], $i + 1, $markers[$i + 1], $markers[$i + 1] - $markers[$i]);
} else if ($i == (count($markers) - 2)) {
printf("1.Last image[%d], markers[%d]=0x%08x, markers[%d]=0x%08x, diff=0x%08x\n", $i, $i, $markers[$i], $i + 1, $markers[$i + 1], $markers[$i + 1] - $markers[$i]);
}
}
fseek($f, $markers[$i]);
$s = fread($f, $markers[$i + 1] - $markers[$i]);
if ((DEBUG_LEVEL >= 2)) {
if (($i == (count($markers) - 2)) || ($i == 0)) {
printf("Read 0x%08x (wanted 0x%08x) bytes\n", strlen($s), $markers[$i + 1] - $markers[$i]);
}
}
$image_length = - 1;
if ($file_type == 0) { // JPEG/JP4
// find end marker
$end_pos = false;
if (strlen($s) >= MIN_JP_LEN) {
$end_pos = strpos($s, END_JP, MIN_JP_LEN); // PHP Warning: strpos(): Offset not contained in string in /home/elphel/git/elphel-tools-x393/extract_images_tiff.php on line 254
}
if (!$end_pos && $next_path) {// try reading from the next file define('MAX_IMG_LEN', 0x1000000); // maximal image size (16M) to look for the end marker
$f1 = fopen($next_path, "r");
$s .= fread($f1, MAX_IMG_LEN); // beginning of the next file
fclose($f1);
$end_pos = strpos($s, END_JP, MIN_JP_LEN); // PHP Warning: strpos(): Offset not contained in string in /home/elphel/git/elphel-tools-x393/extract_images_tiff.php on line 254
if ($end_pos && (DEBUG_LEVEL >= 1)) {
printf ("Found image continued in the next file %s\n", $next_path);
}
}
if ($end_pos) {
$end_pos+=strlen(END_JP);
file_put_contents($tmp_name, substr($s, 0, $end_pos));
$image_length = 0; // will not try to get it
$result_name = elphel_specific_result_name($tmp_name, $image_length, $add_to_chn);
if (DEBUG_OUT_FILES) {
$result_name = substr($result_name,0,strlen($result_name)-4).'-'.strval($i).'.jp4';
}
if ((DEBUG_LEVEL >= 3)) {
printf("%05d: 0x%08x l=0x%06x (0x%06x) %s\n",$i, $markers[$i], $end_pos, $markers[$i + 1] - $markers[$i], "$path/$destination/$result_name");
}
$dest_image = "$path/$destination/$result_name"; // $result_name may now include "/"
$dest_set_dir = dirname($dest_image);
if (! is_dir($dest_set_dir)) {
mkdir($dest_set_dir, 0777);
}
rename($tmp_name, $dest_image); // "$path/$destination/$result_name");
}
} else if ($file_type == 1) { // Tiff
// file_put_contents($tmp_name,$s);
file_put_contents($tmp_name, substr($s, 0, MAX_HEAD_LEN)); // only save beginning of the file
// printf ("header length = %d\n", strlen(substr($s, 0, MAX_HEAD_LEN)));
$result_name = elphel_specific_result_name($tmp_name, $image_length, $add_to_chn);
if ($image_length < 0) {
printf("Wrong image length from Exif header: %d", $image_length);
exit(1);
}
$dest_image = "$path/$destination/$result_name"; // $result_name may now include "/"
$dest_set_dir = dirname($dest_image);
if (! is_dir($dest_set_dir)) {
mkdir($dest_set_dir, 0777);
}
// rename($tmp_name, $dest_image); // "$path/$destination/$result_name");
file_put_contents($dest_image, substr($s, 0, $image_length));
if ((DEBUG_LEVEL >= 2)) {
if ($i == 0) {
file_put_contents($dest_image . ".test", substr($s, 0, $image_length));
printf("2.First image[%d] from img file - %s, length=0x%08x(%d), strlen(s)=%d (%d)\n", $i, $dest_image, $image_length, $image_length, strlen($s), strlen(substr($s, 0, $image_length)));
} else if ($i == (count($markers) - 2)) {
file_put_contents($dest_image . ".test", substr($s, 0, $image_length));
printf("2.Last image[%d] from img file - %s, length=0x%08x(%d), strlen(s)=%d (%d)\n", $i, $dest_image, $image_length, $image_length, strlen($s), strlen(substr($s, 0, $image_length)));
}
}
}
}
if (file_exists($tmp_name)){
unlink ($tmp_name);
}
return 0;
} else {
return - 1;
}
return 0;
}else{
return -1;
}
}
......@@ -242,7 +359,9 @@ function elphel_specific_result_name($file, &$image_length, $add_to_chn=-10){
$exif = exif_read_data($file); // gets false find, what is wrong
// image size = ([StripOffsets] => 557)+ ([StripByteCounts] => 656640)
$image_length = intval($exif['StripOffsets']) + intval($exif['StripByteCounts']);
if ($image_length != 0) {
$image_length = intval($exif['StripOffsets']) + intval($exif['StripByteCounts']);
}
$ext = elphel_specific_result_ext($exif,$forced_ext);
//converting GMT a local time GMT+7
......
......@@ -147,7 +147,7 @@ for i in range(len(cams)):
if args.n==0:
data_size_blocks = pc.read_camogm_disk_file_blocks("/dev/"+p[1][0:-1]+"1", args.file_end)
data_skip_blocks = pc.read_camogm_disk_file_blocks("/dev/"+p[1][0:-1]+"1", args.file_start)
data_size_blocks -= data_skip_blocks
data_size_blocks -= data_skip_blocks # before it included skipped !
else:
data_size_blocks = args.n
data_skip_blocks = args.skip
......
......@@ -260,7 +260,8 @@ class PC():
if not os.path.isdir(dirname):
os.mkdir(dirname)
for i in range(0,dl_n+dl_skip):
fname = dirname+"/"+"file_"+str(i)+".img"
# fname = dirname+"/"+"file_"+str(i)+".img"
fname = "%s/file_%03d.img" %(dirname, i) #dirname+"/"+"file_"+str(i)+".img"
skip = i*(dl_bc-1)
if i>=dl_skip:
shout("sudo dd if="+part+" "+" of="+fname+" bs="+str(dl_bs)+"M count="+str(dl_bc)+" skip="+str(skip))
......@@ -280,12 +281,12 @@ class PC():
os.mkdir(dirname)
num_file = 0
# optional first file to align skip to chunk_blocks, 1 block at a time
if (blocks_skip > 0) and ((blocks_skip // chunk_blocks) > 0):
bwrite = chunk_blocks - (blocks_skip // chunk_blocks)
if (blocks_skip > 0) and ((blocks_skip % chunk_blocks) > 0):
bwrite = chunk_blocks - (blocks_skip % chunk_blocks)
if (bwrite > blocks_load):
bwrite = blocks_load
fname = dirname+"/"+"file_"+str(num_file)+".img"
print("Aligning skip to chunks, writing %d %d-byte blocks (skipping %d blocks) to %s"%(bwrite, block_size, blocks_skip, fname))
print("Aligning skip to chunks, downloading %d %d-byte blocks (skipping %d blocks) to %s"%(bwrite, block_size, blocks_skip, fname))
shout("sudo dd if="+part+" "+" of="+fname+" bs="+str(block_size)+" count="+str(bwrite)+" skip="+str(blocks_skip))
blocks_skip += bwrite
blocks_load -= bwrite
......@@ -297,7 +298,7 @@ class PC():
if (chunks_write > file_chunks):
chunks_write = file_chunks
fname = dirname+"/"+"file_"+str(num_file)+".img"
print("Writing %d %d-byte chunks, skipping %d chunks to %s"%(chunks_write, chunk_bytes, chunks_skip, fname))
print("Downloading %d %d-byte chunks, skipping %d chunks to %s"%(chunks_write, chunk_bytes, chunks_skip, fname))
shout("sudo dd if="+part+" "+" of="+fname+" bs="+str(chunk_bytes)+" count="+str(chunks_write)+" skip="+str(chunks_skip))
bwrite = chunks_write * chunk_blocks
blocks_skip += bwrite
......@@ -306,7 +307,7 @@ class PC():
# optionally write the remainder (< chunk), 1 block at a time
if (blocks_load > 0):
fname = dirname+"/"+"file_"+str(num_file)+".img"
print("Writing last %d %d-byte blocks, skipping %d blocks to %s"%(blocks_load, block_size, blocks_skip, fname))
print("Downloading last %d %d-byte blocks, skipping %d blocks to %s"%(blocks_load, block_size, blocks_skip, fname))
shout("sudo dd if="+part+" "+" of="+fname+" bs="+str(block_size)+" count="+str(blocks_load)+" skip="+str(blocks_skip))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment