WIP: write audio samples to file

Audio samples are recorded after each frame which brakes the structure of video chunks in case when frames_per_chunk > 1. This will be fixed later.

WIP: write audio samples to file
Audio samples are recorded after each frame which brakes the structure of video chunks in case when frames_per_chunk > 1. This will be fixed later.
ecb78ce2 · Mikhail Karpenko · c5b381e3 · ecb78ce2 · ecb78ce2 · ecb78ce2
Commit ecb78ce2 authored May 17, 2017 by Mikhail Karpenko
Hide whitespace changes
Inline Side-by-side

Showing with 82 additions and 50 deletions

camogm_audio.c src/camogm_audio.c +16 -14

camogm_audio.h src/camogm_audio.h +4 -3

camogm_mov.c src/camogm_mov.c +56 -24

qt_audio src/qt_audio +6 -9

No files found.
--- a/src/camogm_audio.c
+++ b/src/camogm_audio.c
@@ -53,8 +53,8 @@ void audio_init(struct audio *audio, bool restart)
 		int err = 0;
 		bool init_ok = false;
 		unsigned int t = audio->audio_rate;
-		unsigned int period_time = 40 * 1000;
+		unsigned int period_time = SAMPLE_TIME * 1000;
-		unsigned int buffer_time = 2000000;
+		unsigned int buffer_time = BUFFER_TIME * 1000;
 		snd_pcm_hw_params_t *hw_params;                         // allocated on stack, do not free
 		snd_pcm_sw_params_t *sw_params;                         // allocated on stack, do not free
 		snd_pcm_status_t *status;                               // allocated on stack, do not free
@@ -65,7 +65,7 @@ void audio_init(struct audio *audio, bool restart)
 		audio->ctx_a.sbuffer_len -= audio->ctx_a.sbuffer_len % 2;
 		// 'while' loop here just to break initialization sequence after an error
 		while (true) {
-			audio->ctx_a.sbuffer = (void *)malloc(audio->ctx_a.sbuffer_len * 8 + AUDIO_SBUFFER_PREFIX);
+			audio->ctx_a.sbuffer = (void *)malloc(audio->ctx_a.sbuffer_len * audio->audio_channels * AUDIO_BPS);
 			if (audio->ctx_a.sbuffer == NULL) {
 				D0(fprintf(debug_file, "error: can not allocate buffer for audio samples: %s\n", strerror(errno)));
 				break;
@@ -170,14 +170,16 @@ void audio_start(struct audio *audio)
 */
 void audio_process(struct audio *audio)
 {
-	int slen;
+	snd_pcm_sframes_t slen;
 	int counter = 0;
 	void *_buf;
-	int _buf_len;
+	long _buf_len;
 	struct timeval tv_sys;
 	snd_timestamp_t ts;
 	snd_pcm_status_t *status;                                   // allocated on stack, do not free
+	assert(audio->write_samples);
 	if (audio->audio_enable == 0)
 		return;
@@ -193,7 +195,7 @@ void audio_process(struct audio *audio)
 		snd_pcm_status(audio->ctx_a.capture_hnd, status);
 		snd_pcm_status_get_tstamp(status, &ts);
 		avail = snd_pcm_status_get_avail(status);
-		int to_read = audio->ctx_a.sbuffer_len;                 // length in samples
+		snd_pcm_uframes_t to_read = audio->ctx_a.sbuffer_len;                 // length in samples
 		if (audio->ctx_a.rem_samples < 0)
 			audio->ctx_a.rem_samples = 0;
 		if (avail >= audio->ctx_a.sbuffer_len && audio->ctx_a.rem_samples == 0)
@@ -214,7 +216,7 @@ void audio_process(struct audio *audio)
 			}
 		}
 		if (to_push_flag) {
-			slen = snd_pcm_readi(audio->ctx_a.capture_hnd, (void *)(audio->ctx_a.sbuffer + AUDIO_SBUFFER_PREFIX), to_read);
+			slen = snd_pcm_readi(audio->ctx_a.capture_hnd, (void *)audio->ctx_a.sbuffer, to_read);
 			if (slen > 0) {
 				int flag = 1;
 				long offset = 0;
@@ -238,12 +240,12 @@ void audio_process(struct audio *audio)
 						break;
 					}
 				}
-				// we need to skip some samples in a new session, but if we just switch the frames than
+				// we need to skip some samples in a new session, but if we just switch the frames then
 				// we need to split new samples in the buffer into two parts - for the previous file,
 				// and the next one...
 				// so we can just save in the first file new data, and in the next use "skip_samples" field
 				if (audio->ctx_a.audio_skip_samples != 0) {
-					D5(fprintf(debug_file, "skip_samples = %lld, available samples = %d\n", audio->ctx_a.audio_skip_samples, slen));
+					D5(fprintf(debug_file, "skip_samples = %lld, available samples = %ld\n", audio->ctx_a.audio_skip_samples, slen));
 					if (audio->ctx_a.audio_skip_samples >= slen) {
 						audio->ctx_a.audio_skip_samples -= slen;
 						flag = 0;
@@ -255,11 +257,10 @@ void audio_process(struct audio *audio)
 				if (flag) {
 					long samples = slen - offset;
 					audio->ctx_a.audio_count += samples;
-					_buf = (void *) (audio->ctx_a.sbuffer + AUDIO_SBUFFER_PREFIX);
+					_buf = (void *)audio->ctx_a.sbuffer;
-					_buf = (void *) ((char *) _buf + offset * 2 * audio->audio_channels);
+					_buf = (void *)((char *) _buf + offset * AUDIO_BPS * audio->audio_channels);
-					_buf_len = samples * 2 * audio->audio_channels;
+					_buf_len = samples * AUDIO_BPS * audio->audio_channels;
-					assert(audio->write_samples);
+					audio->write_samples(audio, _buf, _buf_len, samples);
-					audio->write_samples(_buf, _buf_len, samples);
 					float tr = 1.0 / audio->audio_rate;
 					float l = tr * audio->ctx_a.audio_count;
@@ -274,6 +275,7 @@ void audio_process(struct audio *audio)
 				}
 			}
 		} else {
+			D3(fprintf(debug_file, "error reading from ALSA buffer, error code %ld\n", slen));
 			break;
 		}
 	}

--- a/src/camogm_audio.h
+++ b/src/camogm_audio.h
@@ -27,14 +27,15 @@
 #define SAMPLE_RATE               44100
 #define SAMPLE_CHANNELS           2
-#define SAMPLE_TIME               200
+#define SAMPLE_TIME               200                           ///< restrict ALSA to have this period, in milliseconds
+#define BUFFER_TIME               1000                          ///< approximate ALSA buffer duration, in milliseconds
 #define DEFAULT_SND_DEVICE        "plughw:0,0"
-#define AUDIO_SBUFFER_PREFIX      16
 #define AUDIO_CHANNELS_MIN        1
 #define AUDIO_CHANNELS_MAX        2
 #define AUDIO_RATE_MIN            11025
 #define AUDIO_RATE_MAX            44100
 #define DEFAULT_AUDIO_VOLUME      0xffff
+#define AUDIO_BPS                 2                             ///< bytes per sample for a single channel (can be 1 or 2)
 struct context_audio {
 	char *sbuffer;                                              ///< buffer for audio samples
@@ -78,7 +79,7 @@ struct audio {
 	int frame_period;                                           ///< video frame period, used to calculate time stamps for audio samples
 	void (*get_fpga_time)(const struct audio *audio, struct timeval *tv);//< callback function which can get FPGA time
-	int (*write_samples)(void *buff, int len, int slen);        ///< callback function which actually write data to file, this must be set
+	int (*write_samples)(struct audio *audio, void *buff, long len, long slen); ///< callback function which actually write data to file, this must be set
 	                                                            ///< in the camogm_init_* function when appropriate format is selected
 };

--- a/src/camogm_mov.c
+++ b/src/camogm_mov.c
@@ -32,7 +32,7 @@
 #define QUICKTIME_MIN_HEADER      0x300
 /** @brief The length in bytes of sample-to-chunk table entry as defined in QuickTime format specification */
 #define S2C_ENTRY_LEN             12
-/** @brief The number of entries in sample-to-chunk table */
+/** @brief The number of entries in sample-to-chunk table. See camogm_start_mov for the reason why we need 3 entries. */
 #define S2C_ENTRIES               3
 // for the parser
@@ -76,7 +76,7 @@ int quicktime_template_parser(camogm_state *state,
 void putBigEndian(unsigned long d, int l);
 int parse_special(camogm_state *state);
 int parse(camogm_state *state, int top);
-static int camogm_audio_mov(void *buff, int len, int slen);
+static int camogm_audio_mov(struct audio *audio, void *buff, long len, long slen);
 static inline bool is_audio_frame(unsigned long len);
 static inline void mark_audio(unsigned long *len);
 static inline void unmark_audio(unsigned long *len);
@@ -146,6 +146,12 @@ int camogm_start_mov(camogm_state *state)
 	state->frame_index = 0;
 	if (audio->audio_enable) {
+		/* Allocate memory for sample-to-chunk buffers. For simplicity, all audio chunks must be the same size and
+		 * we enforce this by reading from ALSA buffer (see camogm_audio.c ) only when it contains the appropriate
+		 * number of samples. Such approach simplifies the building of sample-to-chunk atoms, although there are
+		 * two corner cases: the first and the last chunks in file can contain different number of samples, thus we
+		 * need 3 entries in total (first, last and all in between). That is why S2C_ENTRIES = 3.
+		 */
 		audio->audio_samples_to_chunk = malloc(S2C_ENTRY_LEN * S2C_ENTRIES);
 		if (!audio->audio_samples_to_chunk) {
 			return -CAMOGM_FRAME_MALLOC;
@@ -168,7 +174,7 @@ int camogm_start_mov(camogm_state *state)
 	 */
 	data_offset = QUICKTIME_MIN_HEADER + 16;
 	data_offset += 4 * state->max_frames;                       // space for sample size atom - video
-	data_offset += 4 * state->max_frames;                       // space for chunk offsets atom - video
+	data_offset += (4 * state->max_frames) / state->frames_per_chunk; // space for chunk offsets atom - video
 	if (audio->audio_enable) {
 		data_offset += 4 * state->max_frames;                   // space for chunk offsets atom - audio
 		data_offset += S2C_ENTRY_LEN * S2C_ENTRIES;             // space for samples size atom - audio
@@ -188,7 +194,7 @@ int camogm_start_mov(camogm_state *state)
 */
 int camogm_frame_mov(camogm_state *state)
 {
-	int ret;
+	int ret = 0;
 	int i, j;
 	ssize_t iovlen, l;
 	struct iovec chunks_iovec[7];
@@ -218,17 +224,43 @@ int camogm_frame_mov(camogm_state *state)
 /**
 * Write audio samples to file.
- * @param buff
+ * @param[in]   buff   pointer to buffer containing audio samples
- * @param len
+ * @param[in]   len    the size of buffer, in bytes
- * @param slen
+ * @param[in]   slen   the number of audio samples in buffer
- * @return
+ * @return      0 if data was recorded successfully and negative error code otherwise
 */
-static int camogm_audio_mov(void *buff, int len, int slen)
+static int camogm_audio_mov(struct audio *audio, void *buff, long len, long slen)
 {
 	int ret_val = 0;
+	unsigned long k;
+	ssize_t wr_len;
+	camogm_state *state = container_of(audio, camogm_state, audio);
 	D6(fprintf(debug_file, "write audio sample, len = %d, slen = %d\n", len, slen));
+	wr_len = write(state->ivf, buff, len);
+	if (wr_len < len) {
+		D0(fprintf(debug_file, "audio samples write error: %s; returned %d, expected %d\n", strerror(errno), wr_len, len));
+		close(state->ivf);
+		state->ivf = -1;
+		return CAMOGM_FRAME_FILE_ERR;
+	}
+	k = len;
+	mark_audio(&k);
+	state->frame_lengths[state->frame_index] = k;
+	state->frame_index++;
+	if (audio->audio_samples_to_chunk[0] == -1) {
+		// this slot contains the number of samples in first chunk in file
+		audio->audio_samples_to_chunk[0] = slen;
+	} else {
+		// these slots contain the number of samples in the last and in the one before last chunks
+		audio->audio_samples_to_chunk[1] = audio->audio_samples_to_chunk[2];
+		audio->audio_samples_to_chunk[2] = slen;
+	}
+	audio->audio_frameno++;
+	audio->audio_samples += slen;
 	return ret_val;
 }
@@ -350,7 +382,7 @@ int parse_special(camogm_state *state)
 	if (strcmp(str, "frame_duration") == 0) {
 		putBigEndian(sample_dur, 4); return 0;
 	}
-	if (strcmp(str, "samples_chunk") == 0) {
+	if (strcmp(str, "samples_chunk") == 0) {                    // 'stsc' video atom
 		putBigEndian(samplesPerChunk, 4); return 0;
 	}                                                           // will put zeroes on pass 1
@@ -383,15 +415,17 @@ int parse_special(camogm_state *state)
 		putBigEndian(state->audio.audio_channels * 2, 4);
 		return 0;
 	}
-	if (strcmp(str, "audio_stsz") == 0)  {
+	if (strcmp(str, "audio_stsz") == 0) {
 		putBigEndian(state->audio.audio_channels * 2, 4);
-		putBigEndian(state->audio.audio_samples, 4);
+		/* sample size table in 'stsz' atom contains entry for every sample, sound samples are
+		 * all the same size thus this table is not needed - put 0 as the number of entries here
+		 */
+		putBigEndian(0, 4);
 		return 0;
 	}
-	if (strcmp(str, "audio_stco") == 0)  {	// (4 + 4 * chunk_count) bytes
+	if (strcmp(str, "audio_stco") == 0) {
 		long offset = 0;
 		n = state->audio.audio_frameno;
-//fprintf(stderr, "chunk_offsets; n == %d; nframes == %d; samplesPerChunk == %d\n", n, nframes, samplesPerChunk);
 		putBigEndian(n, 4);
 		j = 0;
 		for (i = 0; i < state->frame_index; i++) {
@@ -408,18 +442,19 @@ int parse_special(camogm_state *state)
 			D0(fprintf(debug_file, "Error MOV: wrong records for \"audio_stco\", have written %d, need to write %d\n", j, n));
 		return 0;
 	}
-	// TODO!!!
 	if (strcmp(str, "audio_stsc") == 0) {
 		n = 0;
-		for (i = 0; i < S2C_ENTRIES; i++) {
+		for (int entry = 0; entry < S2C_ENTRIES; entry++) {
-			if (state->audio.audio_samples_to_chunk[i] != -1) {
+			if (state->audio.audio_samples_to_chunk[entry] != -1) {
 				n++;
 			}
 		}
 		putBigEndian(n, 4);
+		// first table entry refers to first audio chunk in file
 		putBigEndian(1, 4);
 		putBigEndian(state->audio.audio_samples_to_chunk[0], 4);
-		putBigEndian(01, 4); // TODO: 02 ???
+		putBigEndian(01, 4);
+		// second table entry, most chunks in file refer here
 		n = 2;
 		if (state->audio.audio_samples_to_chunk[1] != -1) {
 			putBigEndian(n, 4);
@@ -427,6 +462,7 @@ int parse_special(camogm_state *state)
 			putBigEndian(01, 4);
 			n = state->audio.audio_frameno;
 		}
+		// last table entry corresponds to the last audio chunk in file
 		if (state->audio.audio_samples_to_chunk[2] != -1) {
 			putBigEndian(n, 4);
 			putBigEndian(state->audio.audio_samples_to_chunk[2], 4);
@@ -434,10 +470,6 @@ int parse_special(camogm_state *state)
 		}
 		return 0;
 	}
-	if (strcmp(str, "audio_samples_chunk") == 0) {
-		putBigEndian(1, 4);
-		return 0;
-	}                                                           // will put zeroes on pass 1
 	if (strcmp(str, "sample_sizes") == 0) {                     // 'stsz' video atom
 		// index for video stream only, audio index is build separately
 		j = 0;
@@ -449,7 +481,7 @@ int parse_special(camogm_state *state)
 			}
 		}
 		if (j != nframes)
-			D0(fprintf(debug_file, "Error MOV: wrong records for \"samples_sizes\": have write: %d, need to write: %d\n", j, n));
+			D0(fprintf(debug_file, "Error MOV: wrong records for \"samples_sizes\": have write: %d, need to write: %d\n", j, nframes));
 		return 0;
 	}
 	if (strcmp(str, "chunk_offsets") == 0) {                    // 'stco' video atom
@@ -602,7 +634,7 @@ int quicktime_template_parser( camogm_state *state,
 	lseek(ofd, 0, SEEK_SET);
 	audio_timescale = state->audio.audio_rate;
-	audio_rate = audio_timescale;
+	audio_rate = audio_timescale;                               // QuickTime defines sample rate as unsigned 16.16 fixed-point number
 	audio_rate <<= 16;
 	audio_duration = state->audio.audio_samples;
 	audio_channels = state->audio.audio_channels;

--- a/src/qt_audio
+++ b/src/qt_audio
@@ -177,7 +177,7 @@
 		 !time # Creation time
 		 !time # Modification time
 !audio_timescale # Time Scale - 44100				// ???
-!audio_duration # Duration							// ???
+!audio_duration # Duration in units of the time scale
 		  0000 # Language
 		  0000 # Quality
 		} # 'mdhd
@@ -187,7 +187,6 @@
 		'mhlr  # Component type
 		'soun  # Component subtype
 	  00000000 # Component manufacturer
-#		'niko  # Component manufacturer
 	  00000000 # Component Flags
 	  00000000 # Component Flags Mask
 			00 # Component name
@@ -233,8 +232,6 @@
 				000000 # Flags
 			  00000001 # Number of entries
 					 {'sowt # 16 bit LE, Data format
-#					  0001 # Version
-#					  0000 # Revision
 			 00000000 0000 # (reserved)
 					  0001 # Data reference index
 					  0001 # Version
@@ -268,15 +265,15 @@
 				000000 # Flags
 		   !audio_stsc #
 #			  00000000 # Number of entries
-#[			  00000000 # first chunk
+#[			  00000000 # the first chunk number using this table entry (it starts from 1 in video)
-#...		  00000000 # samples per chunk
+#...		  00000000 # the number of samples per chunk
-#]			  00000000 # samples description ID - 01 or 02 ?
+#]			  00000000 # sample description ID; defines the entry in stsd table which describes this chunk (it starts from 1 in video)
 				}
 				{'stsz	# Sample size atom
 					00 # version
 				000000 # Flags
-		   !audio_stsz # Sample Size - 2 for Mono, 4 for Stereo - ?
+		   !audio_stsz # Sample Size - 2 for Mono, 4 for Stereo
-#			  00000000 # Number of entries - empty
+#			  00000000 # Number of entries; this table should be empty if all the samples are the same size
 				}
 				{'stco	# Chunk offset atom
 					00 # version