Commit c934a7a2 authored by Rusty Russell's avatar Rusty Russell

crcsync change: better detection of final block.

parent 28f72798
......@@ -2,6 +2,7 @@
#include <ccan/crc/crc.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
void crc_of_blocks(const void *data, size_t len, unsigned int block_size,
unsigned int crcbits, uint32_t crc[])
......@@ -33,9 +34,14 @@ struct crc_context {
size_t total_bytes;
int have_match;
/* Final block is special (if a different size) */
size_t final_size;
uint32_t final_crc;
/* Uncrc tab. */
uint32_t uncrc_tab[256];
/* This doesn't count the last CRC. */
unsigned int num_crcs;
uint32_t crc[];
};
......@@ -59,13 +65,27 @@ static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize)
}
struct crc_context *crc_context_new(size_t block_size, unsigned crcbits,
const uint32_t crc[], unsigned num_crcs)
const uint32_t crc[], unsigned num_crcs,
size_t final_size)
{
struct crc_context *ctx;
assert(num_crcs > 0);
assert(block_size > 0);
assert(final_size > 0);
assert(final_size <= block_size);
ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs);
if (ctx) {
ctx->block_size = block_size;
if (final_size != block_size) {
ctx->final_size = final_size;
ctx->final_crc = crc[--num_crcs];
} else {
/* If this is 0, we never compare against it. */
ctx->final_size = 0;
}
/* Technically, 1 << 32 is undefined. */
if (crcbits >= 32)
ctx->crcmask = 0xFFFFFFFF;
......@@ -103,6 +123,14 @@ static int crc_matches(const struct crc_context *ctx)
return -1;
}
static bool final_matches(const struct crc_context *ctx)
{
if (ctx->literal_bytes != ctx->final_size)
return false;
return (ctx->running_crc & ctx->crcmask) == ctx->final_crc;
}
static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte)
{
return crc32c(crc, &newbyte, 1);
......@@ -144,10 +172,16 @@ size_t crc_read_block(struct crc_context *ctx, long *result,
else
old = NULL;
while (!old || (crcmatch = crc_matches(ctx)) < 0) {
while (ctx->literal_bytes < ctx->block_size
|| (crcmatch = crc_matches(ctx)) < 0) {
if (consumed == buflen)
break;
/* Increment these now in case we hit goto: below. */
ctx->literal_bytes++;
ctx->total_bytes++;
consumed++;
/* Update crc. */
if (old) {
ctx->running_crc = crc_roll(ctx->running_crc,
......@@ -161,11 +195,13 @@ size_t crc_read_block(struct crc_context *ctx, long *result,
ctx->running_crc = crc_add_byte(ctx->running_crc, *p);
if (p == (uint8_t *)buf + ctx->block_size - 1)
old = buf;
/* We don't roll this csum, we only look for it after
* a block match. It's simpler and faster. */
if (final_matches(ctx)) {
crcmatch = ctx->num_crcs;
goto have_match;
}
}
ctx->literal_bytes++;
ctx->total_bytes++;
consumed++;
p++;
}
......@@ -180,8 +216,11 @@ size_t crc_read_block(struct crc_context *ctx, long *result,
} else {
have_match:
*result = -crcmatch-1;
ctx->literal_bytes -= ctx->block_size;
assert(ctx->literal_bytes == 0);
if (crcmatch == ctx->num_crcs)
assert(ctx->literal_bytes == ctx->final_size);
else
assert(ctx->literal_bytes == ctx->block_size);
ctx->literal_bytes = 0;
ctx->have_match = -1;
ctx->running_crc = 0;
/* Nothing more in the buffer. */
......@@ -219,37 +258,9 @@ size_t crc_read_block(struct crc_context *ctx, long *result,
return consumed;
}
/* We could try many techniques to match the final block. We can
* simply try to checksum whatever's left at the end and see if it
* matches the final block checksum. This works for the exact-match
* case.
*
* We can do slightly better than this: if we try to match the checksum
* on every block (starting with block_size 1) from where we end to EOF,
* we can capture the "data appended" case as well.
*/
static size_t final_block_match(struct crc_context *ctx)
{
size_t size;
uint32_t crc;
if (ctx->num_crcs == 0)
return 0;
crc = 0;
for (size = 0; size < buffer_size(ctx); size++) {
const uint8_t *p = ctx->buffer;
crc = crc_add_byte(crc, p[ctx->buffer_start+size]);
if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1])
return size+1;
}
return 0;
}
long crc_read_flush(struct crc_context *ctx)
{
long ret;
size_t final;
/* We might have ended right on a matched block. */
if (ctx->have_match != -1) {
......@@ -263,20 +274,11 @@ long crc_read_flush(struct crc_context *ctx)
return ret;
}
/* Look for truncated final block. */
final = final_block_match(ctx);
if (!final) {
/* Nope? Just a literal. */
ret = buffer_size(ctx);
ctx->buffer_start += ret;
ctx->literal_bytes -= ret;
return ret;
}
/* We matched (some of) what's left. */
ret = -((int)ctx->num_crcs-1)-1;
ctx->buffer_start += final;
ctx->literal_bytes -= final;
/* The rest is just a literal. */
ret = buffer_size(ctx);
assert(ctx->literal_bytes == ret);
ctx->buffer_start = ctx->buffer_end = 0;
ctx->literal_bytes = 0;
return ret;
}
......
......@@ -23,12 +23,14 @@ void crc_of_blocks(const void *data, size_t len, unsigned int blocksize,
* @crcbits: the bits valid in the CRCs (<= 32)
* @crc: array of block crcs
* @num_crcs: number of block crcs
* @final_size: the final block size (<= blocksize).
*
* Returns an allocated pointer to the structure for crc_find_block,
* or NULL. Makes a copy of @crc and @num_crcs.
*/
struct crc_context *crc_context_new(size_t blocksize, unsigned crcbits,
const uint32_t crc[], unsigned num_crcs);
const uint32_t crc[], unsigned num_crcs,
size_t final_size);
/**
* crc_read_block - search for block matches in the buffer.
......
......@@ -39,26 +39,30 @@ int main(int argc, char *argv[])
"pqr-a-very-long-test-that-differs-between-two-invokations-of-the-same-page-st"
/* MATCH */
"uvwxy" "z ABC" "DEFGH" "IJKLM" "NOPQR" "STUVW" "XYZ 0" "12345"
"6789"
/* NO MATCH */
"6789ab";
"ab";
int expected[] = { 4,
-2, -3,
77,
-5, -6, -7, -8, -9, -10, -11, -12,
6 };
-13,
2 };
crc_info_t crc_info1;
struct crc_context *crcctx;
long result;
size_t ndigested;
size_t offset = 0;
size_t len2 = strlen(data2);
size_t finalsize = strlen(data1) % BLOCKSIZE ?: BLOCKSIZE;
int expected_i = 0;
plan_tests(ARRAY_SIZE(expected) + 2);
crcblocks(&crc_info1, data1, strlen(data1), BLOCKSIZE);
crcctx = crc_context_new(BLOCKSIZE, 30, crc_info1.crcs, crc_info1.block_count);
crcctx = crc_context_new(BLOCKSIZE, 30, crc_info1.crcs, crc_info1.block_count,
finalsize);
while ( offset < len2)
{
ndigested = crc_read_block(crcctx, &result, data2+offset, len2 - offset);
......
#include "crcsync/crcsync.h"
#include "../crcsync.c"
#include "tap/tap.h"
#include <stdlib.h>
#include <stdbool.h>
......@@ -64,14 +65,17 @@ static void test_sync(const char *buffer1, size_t len1,
const struct result results[], size_t num_results)
{
struct crc_context *ctx;
size_t used, ret, i, curr_literal;
size_t used, ret, i, curr_literal, finalsize;
long result;
uint32_t crcs[num_blocks(len1, block_size)];
crc_of_blocks(buffer1, len1, block_size, 32, crcs);
finalsize = len1 % block_size ?: block_size;
/* Normal method. */
ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs));
ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs),
finalsize);
curr_literal = 0;
for (used = 0, i = 0; used < len2; used += ret) {
......@@ -89,7 +93,8 @@ static void test_sync(const char *buffer1, size_t len1,
crc_context_free(ctx);
/* Byte-at-a-time method. */
ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs));
ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs),
finalsize);
curr_literal = 0;
for (used = 0, i = 0; used < len2; used += ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment