/* * Copyright (c) 2000-2006 All rights reserved * Alberto Reggiori * Dirk-Willem van Gulik * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by * Alberto Reggiori and * Dirk-Willem van Gulik ." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * * 5. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * 6. Products derived from this software may not be called "RDFStore" * nor may "RDFStore" appear in their names without prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * * This software consists of work developed by Alberto Reggiori and * Dirk-Willem van Gulik. The RDF specific part is based based on public * domain software written at the Stanford University Database Group by * Sergey Melnik. For more information on the RDF API Draft work, * please see * The DBMS TCP/IP server part is based on software originally written * by Dirk-Willem van Gulik for Web Weaving Internet Engineering m/v Enschede, * The Netherlands. * * $Id: rdfstore_digest.c,v 1.12 2006/06/19 10:10:21 areggiori Exp $ * */ #include #include "rdfstore_digest.h" #include "rdfstore_log.h" #include "rdfstore_serializer.h" /* #define RDFSTORE_DEBUG_DIGEST */ #ifdef RDFSTORE_DEBUG_DIGEST #define DIGEST_PRINT(dd) {\ int i = 0;\ printf("Statement digest at line %s:%d is '",__FILE__,__LINE__);\ for (i = 0; i < RDFSTORE_SHA_DIGESTSIZE; i++) {\ printf("%02X", dd[i]);\ };\ printf("'\n");\ } #else #define DIGEST_PRINT(dd) {} #endif int rdfstore_digest_digest(unsigned char *input, int len, unsigned char digest[RDFSTORE_SHA_DIGESTSIZE]) { RDFSTORE_AP_SHA1_CTX sha_info; rdfstore_ap_SHA1Init(&sha_info); rdfstore_ap_SHA1Update(&sha_info, input, len); rdfstore_ap_SHA1Final(digest, &sha_info); DIGEST_PRINT(digest); return 0; }; const char * rdfstore_digest_get_digest_algorithm() { return "SHA-1"; }; /* As it stands - this is a 32 bit (partial) hash - we are not using the full * 160 bots of a normal SHA1 operation */ static rdf_store_digest_t rdfstore_digest_crc64(unsigned char * dd) { if (dd == NULL) return 0; return (rdf_store_digest_t) htonl(*(uint32_t *) dd); } int rdfstore_digest_get_node_digest(RDF_Node * node, unsigned char dd[RDFSTORE_SHA_DIGESTSIZE], int unique) { unsigned char *input = NULL; int status = 0; int len = 0; if (node == NULL) return -1; if (node->type != 1) { if (node->value.resource.identifier == NULL) return -1; #ifdef RDFSTORE_DEBUG_DIGEST printf("get_node_digest( RESOURCE '%s')\n", node->value.resource.identifier); #endif len = node->value.resource.identifier_len; input = (unsigned char *) RDFSTORE_MALLOC( sizeof(unsigned char) * (len + 1)); /* also bNode bit flag below 1/0 */ if (input == NULL) return -1; memcpy(input, node->value.resource.identifier,len); if( node->type == 2 ) { memcpy(input+len, "1", 1); /* is bNode */ } else { memcpy(input+len, "0", 1); }; len++; } else if (node->type == 1) { int len_lang,len_dt; /* literals can be empty i.e. node->value.literal.string can be NULL */ #ifdef RDFSTORE_DEBUG_DIGEST printf("get_node_digest( LITERAL '%s')\n", node->value.literal.string); #endif len = (node->value.literal.string != NULL) ? node->value.literal.string_len : 0; len_lang=0; len_dt=0; if( unique ) { if (node->value.literal.lang != NULL) len_lang = strlen(node->value.literal.lang); if (node->value.literal.parseType == 1) len_dt = strlen(RDFSTORE_RDF_PARSETYPE_LITERAL); else if (node->value.literal.dataType != NULL) len_dt= strlen(node->value.literal.dataType); }; input = (unsigned char *) RDFSTORE_MALLOC( sizeof(unsigned char) * (len + len_lang + len_dt + 2)); /* the two double quotes signs to distinguish between resources and literals */ if (input == NULL) return -1; /* * the following assures that different digests are generated * for the same string for Literal and URI ref of a Resource * e.g. "http://www.google.com" and * would result in different digests */ memcpy(input, "\"", 1); if (node->value.literal.string != NULL) { memcpy(input+1, node->value.literal.string, len); }; memcpy(input+1+len, "\"", 1); /* keep the digest unique per xml:lang and rdf:datatype if requested */ if( unique ) { if (node->value.literal.lang != NULL) memcpy(input+1+len+1, node->value.literal.lang, len_lang); if (node->value.literal.parseType == 1) memcpy(input+1+len+1+len_lang, RDFSTORE_RDF_PARSETYPE_LITERAL, len_dt); else if (node->value.literal.dataType != NULL) memcpy(input+1+len+1+len_lang, node->value.literal.dataType, len_dt); }; len += len_lang + len_dt + 2; } else { return -1; }; status = rdfstore_digest_digest(input, len, dd); RDFSTORE_FREE(input); return status; }; /* * crc64 of an SHA-1 cryptographic hash - see Stanford API Draft and GUID * stuff */ rdf_store_digest_t rdfstore_digest_get_node_hashCode( RDF_Node * node, int unique ) { unsigned char dd[RDFSTORE_SHA_DIGESTSIZE]; rdf_store_digest_t hc = 0; if (node == NULL) return 0; #ifdef RDFSTORE_DEBUG_DIGEST if (node->hashcode) printf("Node hashcode for '%s' already carried out '%d'\n", (node->type != 1) ? node->value.resource.identifier : node->value.literal.string, node->hashcode); #endif if (node->hashcode) return node->hashcode; if ((rdfstore_digest_get_node_digest(node, dd, unique)) != 0) { hc = 0; } else { hc = rdfstore_digest_crc64(dd); }; return hc; }; int rdfstore_digest_get_statement_digest(RDF_Statement * statement, RDF_Node * given_context, unsigned char dd[RDFSTORE_SHA_DIGESTSIZE]) { unsigned char dds[RDFSTORE_SHA_DIGESTSIZE]; unsigned char ddp[RDFSTORE_SHA_DIGESTSIZE]; unsigned char ddo[RDFSTORE_SHA_DIGESTSIZE]; unsigned char ddc[RDFSTORE_SHA_DIGESTSIZE]; /* unsigned char ddn[RDFSTORE_SHA_DIGESTSIZE]; */ unsigned char *input = NULL; RDF_Node *context = NULL; int status = 0; if (statement == NULL) return -1; if (given_context == NULL) { if (statement->context != NULL) context = statement->context; } else { /* use given context instead */ context = given_context; }; if ((rdfstore_digest_get_node_digest(statement->subject, dds, 1)) != 0) return -1; DIGEST_PRINT(dds); if ((rdfstore_digest_get_node_digest(statement->predicate, ddp, 1)) != 0) return -1; DIGEST_PRINT(ddp); if ((rdfstore_digest_get_node_digest(statement->object, ddo, 1)) != 0) /* distinguish RDF literal hashcode by xml:lang or rdf:datatype */ return -1; DIGEST_PRINT(ddo); if (context != NULL) { if ((rdfstore_digest_get_node_digest(context, ddc, 1)) != 0) return -1; DIGEST_PRINT(ddc); input = (unsigned char *) RDFSTORE_MALLOC( sizeof(unsigned char) * (RDFSTORE_SHA_DIGESTSIZE * 4)); /* s,p,o,c */ } else { input = (unsigned char *) RDFSTORE_MALLOC( sizeof(unsigned char) * (RDFSTORE_SHA_DIGESTSIZE * 3)); /* s,p,o */ }; if (input == NULL) return -1; memcpy(input, dds, RDFSTORE_SHA_DIGESTSIZE); memcpy(input + RDFSTORE_SHA_DIGESTSIZE, ddp, RDFSTORE_SHA_DIGESTSIZE); if (statement->object->type == 1) { register int i; unsigned char c = ddo[0]; /* * rotate one byte - see why at * http://www-db.stanford.edu/~melnik/rdf/api.html#digest * even if it says rotate to the left why is the right :) */ for (i = 0; i < RDFSTORE_SHA_DIGESTSIZE - 1; i++) ddo[i] = ddo[i + 1]; ddo[RDFSTORE_SHA_DIGESTSIZE - 1] = c; }; memcpy(input + (2 * RDFSTORE_SHA_DIGESTSIZE), ddo, RDFSTORE_SHA_DIGESTSIZE); if (context != NULL) memcpy(input + (3 * RDFSTORE_SHA_DIGESTSIZE), ddc, RDFSTORE_SHA_DIGESTSIZE); status = rdfstore_digest_digest(input, (context != NULL) ? (RDFSTORE_SHA_DIGESTSIZE * 4) : (RDFSTORE_SHA_DIGESTSIZE * 3), dd); DIGEST_PRINT(dd); RDFSTORE_FREE(input); return status; } rdf_store_digest_t rdfstore_digest_get_statement_hashCode(RDF_Statement * statement, RDF_Node * given_context) { unsigned char dd[RDFSTORE_SHA_DIGESTSIZE]; rdf_store_digest_t hc = 0; if (statement == NULL) return 0; #ifdef RDFSTORE_DEBUG_DIGEST if (statement->hashcode) { char *ntriples_rep = rdfstore_ntriples_statement(statement, NULL); printf("Statement hashcode for '%s' already carried out '%d'\n", ntriples_rep, statement->hashcode); RDFSTORE_FREE(ntriples_rep); }; #endif if (statement->hashcode) return statement->hashcode; if ((rdfstore_digest_get_statement_digest(statement, given_context, dd)) != 0) { hc = 0; } else { /* * perhaps it is instead => s.hashCode() * 7) + p.hashCode()) * * 7 + o.hashCode() + c.hashCode() */ hc = rdfstore_digest_crc64(dd); }; #ifdef RDFSTORE_DEBUG_DIGEST { char *ntriples_rep = rdfstore_ntriples_statement(statement, NULL); printf("Just computed statement hashcode for '%s' to '%d' %s\n", ntriples_rep, hc, (given_context != NULL) ? "(not to be cached)" : ""); RDFSTORE_FREE(ntriples_rep); }; #endif return hc; }