When you anchor a file to a blockchain, you're not storing the file itself. You're storing a hash that proves the file existed at that moment. ProofAnchor anchors SHA-256 hashes to the Polygon blockchain, with the file never leaving your machine. But how do you prove your specific file hash is actually included in that blockchain transaction? That's where Merkle proofs come in.
The Merkle tree structure
Think of a Merkle tree like a tournament bracket, but for hashes. Your file hash sits at a "leaf" position. Each level up combines pairs of hashes until you reach a single "root" hash. That root hash is what gets stored on the blockchain.
The proof gives you a path from your leaf to the root, plus the "sibling" hashes you need to reconstruct the tree. If you can rebuild the same root hash the blockchain shows, your file was definitely included.
Let's see this in action with real code:
pip install verify-proof
Manual Merkle proof validation
Here's how to manually walk a Merkle proof using the same logic verify-proof uses internally:
import hashlib
from verify_proof import hash_file, verify_proof, load_proof
def validate_merkle_proof_manual(file_hash, merkle_path):
"""
Manually validate a Merkle proof by walking from leaf to root.
Uses the same combination logic as the verify-proof package.
"""
current = file_hash
print(f"Starting with file hash: {current}")
for i, step in enumerate(merkle_path):
sibling = step.get("hash", "")
position = step.get("position", "right")
print(f"\nStep {i + 1}:")
print(f" Current: {current}")
print(f" Sibling: {sibling}")
print(f" Position: {position}")
# Critical: this is the exact logic verify-proof uses
if position == "left":
combined = sibling + current # sibling left, current right
else:
combined = current + sibling # current left, sibling right
# Hash the combined hex string (as bytes, not UTF-8)
current = hashlib.sha256(bytes.fromhex(combined)).hexdigest()
print(f" Combined: {combined}")
print(f" New hash: {current}")
print(f"\nFinal root: {current}")
return current
# Test with a sample proof structure
sample_proof = {
"hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
"algorithm": "sha256",
"merkle_path": [
{"position": "right", "hash": "a1b2c3d4e5f6"},
{"position": "left", "hash": "f6e5d4c3b2a1"}
],
"blockchain": "polygon",
"tx_id": "0x1234567890abcdef",
"anchored_at": "2024-03-15T10:30:00Z",
"service": "ProofAnchor"
}
file_hash = sample_proof["hash"]
merkle_path = sample_proof["merkle_path"]
# Manual validation
manual_root = validate_merkle_proof_manual(file_hash, merkle_path)
# Compare with verify-proof package
result = verify_proof(file_hash, sample_proof)
package_root = result.get("merkle_root", "")
print(f"\nManual calculation: {manual_root}")
print(f"Package calculation: {package_root}")
print(f"Match: {manual_root == package_root}")
Validating real proof files
In practice, you'll have actual .proof.json files to validate. Here's how to process them:
import json
import os
from pathlib import Path
def validate_document_proofs(directory_path):
"""
Find all .proof.json files in a directory and validate them
against their corresponding documents.
"""
directory = Path(directory_path)
proof_files = list(directory.glob("*.proof.json"))
results = []
for proof_path in proof_files:
# Assume the document has the same name without .proof.json
document_name = proof_path.name.replace(".proof.json", "")
document_path = directory / document_name
if not document_path.exists():
print(f"Warning: No document found for {proof_path.name}")
continue
try:
# Load the proof
proof_data = load_proof(str(proof_path))
# Hash the actual file
actual_hash = hash_file(str(document_path))
# Validate the proof
result = verify_proof(actual_hash, proof_data)
results.append({
"file": document_name,
"verified": result["verified"],
"blockchain": result.get("blockchain", "unknown"),
"anchored_at": result.get("anchored_at", "unknown"),
"merkle_verified": result.get("merkle_verified", False)
})
print(f"{document_name}: {'✓' if result['verified'] else '✗'}")
if not result["verified"]:
print(f" Error: {result.get('error', 'Unknown error')}")
else:
print(f" Anchored: {result['anchored_at']}")
if "merkle_root" in result:
print(f" Merkle root: {result['merkle_root'][:16]}...")
except Exception as e:
print(f"Error processing {proof_path.name}: {e}")
results.append({
"file": document_name,
"verified": False,
"error": str(e)
})
return results
# Example usage
# results = validate_document_proofs("./documents")
What's next
Merkle proofs are the mathematical foundation that makes blockchain timestamps trustworthy. The verify-proof package handles all this complexity for you, but understanding the underlying mechanics helps you debug edge cases and build confidence in the verification.
Check out the full source code to see how the package implements additional features like algorithm selection and error handling.