X-Git-Url: http://p2p-next.cs.pub.ro/gitweb/?a=blobdiff_plain;f=src%2Flibswift%2Fhashtree.h;fp=src%2Flibswift%2Fhashtree.h;h=a9827d03143cea22dcb354cc8a86688c9cc49c14;hb=45963a7511531cd1656ad5d3847d2dafd015c54d;hp=0000000000000000000000000000000000000000;hpb=d069796805ad79542fd7e4406d1e9c6d2d8c2ef7;p=swifty.git diff --git a/src/libswift/hashtree.h b/src/libswift/hashtree.h new file mode 100644 index 0000000..a9827d0 --- /dev/null +++ b/src/libswift/hashtree.h @@ -0,0 +1,165 @@ +/* + * hashtree.h + * hashing, Merkle hash trees and data integrity + * + * Created by Victor Grishchenko on 3/6/09. + * Copyright 2009-2012 TECHNISCHE UNIVERSITEIT DELFT. All rights reserved. + * + */ +#ifndef SWIFT_SHA1_HASH_TREE_H +#define SWIFT_SHA1_HASH_TREE_H +#include +#include +#include "bin.h" +#include "binmap.h" + +namespace swift { + +#define HASHSZ 20 + +/** SHA-1 hash, 20 bytes of data */ +struct Sha1Hash { + uint8_t bits[HASHSZ]; + + Sha1Hash() { memset(bits,0,HASHSZ); } + /** Make a hash of two hashes (for building Merkle hash trees). */ + Sha1Hash(const Sha1Hash& left, const Sha1Hash& right); + /** Hash an old plain string. */ + Sha1Hash(const char* str, size_t length=-1); + Sha1Hash(const uint8_t* data, size_t length); + /** Either parse hash from hex representation of read in raw format. */ + Sha1Hash(bool hex, const char* hash); + + std::string hex() const; + bool operator == (const Sha1Hash& b) const + { return 0==memcmp(bits,b.bits,SIZE); } + bool operator != (const Sha1Hash& b) const { return !(*this==b); } + const char* operator * () const { return (char*) bits; } + + const static Sha1Hash ZERO; + const static size_t SIZE = HASHSZ; +}; + +// Arno: The chunk size parameter can now be configured via the constructor, +// for values up to 8192. Above that you'll have to edit the +// SWIFT_MAX_SEND_DGRAM_SIZE in swift.h +// +#define SWIFT_DEFAULT_CHUNK_SIZE 1024 + + +/** This class controls data integrity of some file; hash tree is put to + an auxilliary file next to it. The hash tree file is mmap'd for + performance reasons. Actually, I'd like the data file itself to be + mmap'd, but 32-bit platforms do not allow that for bigger files. + + There are two variants of the general workflow: either a HashTree + is initialized with a root hash and the rest of hashes and data is + spoon-fed later, OR a HashTree is initialized with a data file, so + the hash tree is derived, including the root hash. + */ +class HashTree : Serializable { + /** Merkle hash tree: root */ + Sha1Hash root_hash_; + Sha1Hash *hashes_; + /** Merkle hash tree: peak hashes */ + Sha1Hash peak_hashes_[64]; + bin_t peaks_[64]; + int peak_count_; + /** File descriptor to put hashes to */ + int fd_; + int hash_fd_; + std::string filename_; // for easy serialization + /** Base size, as derived from the hashes. */ + uint64_t size_; + uint64_t sizec_; + /** Part of the tree currently checked. */ + uint64_t complete_; + uint64_t completec_; + /** Binmap of own chunk availability */ + binmap_t ack_out_; + + // CHUNKSIZE + /** Arno: configurable fixed chunk size in bytes */ + uint32_t chunk_size_; + + // LESSHASH + binmap_t is_hash_verified_; // binmap being abused as bitmap, only layer 0 used + // FAXME: make is_hash_verified_ part of persistent state? + + int internal_deserialize(FILE *fp,bool contentavail=true); + + //NETWVSHASH + bool check_netwvshash_; + +protected: + + void Submit(); + void RecoverProgress(); + bool RecoverPeakHashes(); + Sha1Hash DeriveRoot(); + bool OfferPeakHash (bin_t pos, const Sha1Hash& hash); + + +public: + + HashTree (const char* file_name, const Sha1Hash& root=Sha1Hash::ZERO, uint32_t chunk_size=SWIFT_DEFAULT_CHUNK_SIZE, + const char* hash_filename=NULL, bool force_check_diskvshash=true, bool check_netwvshash=true, const char* binmap_filename=NULL); + + // Arno, 2012-01-03: Hack to quickly learn root hash from a checkpoint + HashTree (bool dummy, const char* binmap_filename); + + /** Offer a hash; returns true if it verified; false otherwise. + Once it cannot be verified (no sibling or parent), the hash + is remembered, while returning false. */ + bool OfferHash (bin_t pos, const Sha1Hash& hash); + /** Offer data; the behavior is the same as with a hash: + accept or remember or drop. Returns true => ACK is sent. */ + bool OfferData (bin_t bin, const char* data, size_t length); + /** For live streaming. Not implemented yet. */ + int AppendData (char* data, int length) ; + + int file_descriptor () const { return fd_; } + /** Returns the number of peaks (read on peak hashes). */ + int peak_count () const { return peak_count_; } + /** Returns the i-th peak's bin number. */ + bin_t peak (int i) const { return peaks_[i]; } + /** Returns peak hash #i. */ + const Sha1Hash& peak_hash (int i) const { return peak_hashes_[i]; } + /** Return the peak bin the given bin belongs to. */ + bin_t peak_for (bin_t pos) const; + /** Return a (Merkle) hash for the given bin. */ + const Sha1Hash& hash (bin_t pos) const {return hashes_[pos.toUInt()];} + /** Give the root hash, which is effectively an identifier of this file. */ + const Sha1Hash& root_hash () const { return root_hash_; } + /** Get file size, in bytes. */ + uint64_t size () const { return size_; } + /** Get file size in chunks (in kilobytes, rounded up). */ + uint64_t size_in_chunks () const { return sizec_; } + /** Number of bytes retrieved and checked. */ + uint64_t complete () const { return complete_; } + /** Number of chunks retrieved and checked. */ + uint64_t chunks_complete () const { return completec_; } + /** The number of bytes completed sequentially, i.e. from the beginning of + the file, uninterrupted. */ + uint64_t seq_complete () ; + /** Whether the file is complete. */ + bool is_complete () + { return size_ && complete_==size_; } + /** The binmap of complete chunks. */ + binmap_t& ack_out () { return ack_out_; } + std::string filename() { return filename_; } // Arno + uint32_t chunk_size() { return chunk_size_; } // CHUNKSIZE + ~HashTree (); + + // Arno: persistent storage for state other than hashes (which are in .mhash) + int serialize(FILE *fp); + int deserialize(FILE *fp); + int partial_deserialize(FILE *fp); + + //NETWVSHASH + bool get_check_netwvshash() { return check_netwvshash_; } +}; + +} + +#endif