Add another hamming function. The former are for varbit parameters. The

later are for text parameters (like the other functions). Also, add another
operator ~@~.
parent 77917d6e
......@@ -129,3 +129,99 @@ Datum hamming_op(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res >= pgs_hamming_threshold);
}
PG_FUNCTION_INFO_V1(hamming_text);
Datum
hamming_text(PG_FUNCTION_ARGS)
{
char *a, *b;
int alen, blen;
char *pa, *pb;
int maxlen;
float4 res = 0.0;
a = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(0))));
b = DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(PG_GETARG_TEXT_P(1))));
alen = strlen(a);
blen = strlen(b);
if (alen > PGS_MAX_STR_LEN || blen > PGS_MAX_STR_LEN)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument exceeds the maximum length of %d bytes",
PGS_MAX_STR_LEN)));
elog(DEBUG1, "alen: %d; blen: %d", alen, blen);
if (alen != blen)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("bit strings must have the same length")));
elog(DEBUG1, "a: %s ; b: %s", a, b);
/* alen and blen have the same length */
maxlen = alen;
pa = a;
pb = b;
while (*pa != '\0')
{
elog(DEBUG4, "a: %c ; b: %c", *pa, *pb);
/* are these bit strings? */
if ((*pa != '0' && *pa != '1') || (*pb != '0' && *pb != '1'))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("arguments must contain only 0 or 1")));
if (*pa++ != *pb++)
res++;
}
elog(DEBUG1, "is normalized: %d", pgs_hamming_is_normalized);
elog(DEBUG1, "maximum length: %d", maxlen);
elog(DEBUG1, "hammingdistance(%s, %s) = %.3f", DatumGetCString(a), DatumGetCString(b), res);
/* if one string has zero length then return one */
if (maxlen == 0)
{
PG_RETURN_FLOAT4(1.0);
}
else if (pgs_hamming_is_normalized)
{
res = 1.0 - (res / maxlen);
elog(DEBUG1, "hamming(%s, %s) = %.3f", DatumGetCString(a), DatumGetCString(b), res);
PG_RETURN_FLOAT4(res);
}
else
{
PG_RETURN_FLOAT4(res);
}
}
PG_FUNCTION_INFO_V1(hamming_text_op);
Datum hamming_text_op(PG_FUNCTION_ARGS)
{
float4 res;
/*
* store *_is_normalized value temporarily 'cause
* threshold (we're comparing against) is normalized
*/
bool tmp = pgs_hamming_is_normalized;
pgs_hamming_is_normalized = true;
res = DatumGetFloat4(DirectFunctionCall2(
hamming_text,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
/* we're done; back to the previous value */
pgs_hamming_is_normalized = tmp;
PG_RETURN_BOOL(res >= pgs_hamming_threshold);
}
......@@ -80,13 +80,21 @@ CREATE OR REPLACE FUNCTION hamming_op (varbit, varbit) RETURNS bool
AS 'MODULE_PATHNAME', 'hamming_op'
LANGUAGE C STABLE STRICT;
--CREATE OPERATOR ~@@ (
-- LEFTARG = text,
-- RIGHTARG = text,
-- PROCEDURE = hamming_op,
-- COMMUTATOR = '~@@',
-- JOIN = contjoinsel
--);
CREATE OR REPLACE FUNCTION hamming_text (text, text) RETURNS float4
AS 'MODULE_PATHNAME','hamming_text'
LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION hamming_text_op (text, text) RETURNS bool
AS 'MODULE_PATHNAME', 'hamming_text_op'
LANGUAGE C STABLE STRICT;
CREATE OPERATOR ~@~ (
LEFTARG = text,
RIGHTARG = text,
PROCEDURE = hamming_text_op,
COMMUTATOR = '~@~',
JOIN = contjoinsel
);
-- Jaccard
CREATE OR REPLACE FUNCTION jaccard (text, text) RETURNS float4
......
......@@ -214,6 +214,8 @@ extern Datum PGS_EXPORT euclidean(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT euclidean_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_text(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT hamming_text_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaccard(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaccard_op(PG_FUNCTION_ARGS);
extern Datum PGS_EXPORT jaro(PG_FUNCTION_ARGS);
......
......@@ -19,7 +19,9 @@ DROP OPERATOR ~!! (text, text);
DROP FUNCTION euclidean (text, text);
DROP FUNCTION euclidean_op (text, text);
--DROP OPERATOR ~@@ (text, text);
DROP OPERATOR ~@~ (text, text);
DROP FUNCTION hamming_text (text, text);
DROP FUNCTION hamming_text_op (text, text);
DROP FUNCTION hamming (varbit, varbit);
DROP FUNCTION hamming_op (varbit, varbit);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment