Code-Specification Cross-Validation

An API specification says tokens expire in 30 minutes, max 3 sessions, and MD5 must not be used. The implementation has 1-hour expiry, 5 sessions, and uses MD5 for session IDs. Parseltongue catches every divergence.

Files

"""
Demo: Code-Specification Cross-Validation

Scenario: An API specification says tokens expire in 30 minutes,
max 3 sessions, and MD5 must not be used. The implementation has
1-hour expiry, 5 sessions, and uses MD5 for session IDs.
Parseltongue catches every divergence.

Intentional mismatches (the point of the demo):
- Token expiry: spec says 1800s (30 min), impl uses 3600s (60 min)
- Session limit: spec says max 3, impl allows 5
- MD5 usage: spec prohibits MD5, impl uses it for session IDs
"""

import json
import logging
import os
import sys

from parseltongue.core import System, load_source


def _print_list(items):
    for item in items:
        if isinstance(item, dict):
            origin = item.get("origin", "")
            tag = str(origin) if hasattr(origin, "is_grounded") else f"[origin: {origin}]"
            print(f"  {item['name']} = {item['wff']} {tag}")
        else:
            print(f"  {item}")


def main():
    plog = logging.getLogger("parseltongue")
    plog.setLevel(logging.WARNING)
    handler = logging.StreamHandler(sys.stdout)
    handler.setFormatter(logging.Formatter("  [%(levelname)s] %(message)s"))
    plog.addHandler(handler)

    s = System(overridable=True)
    print("=" * 60)
    print("Parseltongue — Code-Specification Cross-Validation")
    print("=" * 60)

    # ----------------------------------------------------------
    # Phase 0: Load both documents
    # ----------------------------------------------------------
    print("\n--- Phase 0: Load specification and implementation ---")
    doc_dir = os.path.join(os.path.dirname(__file__), "resources")
    s.load_document("Spec", os.path.join(doc_dir, "spec.txt"))
    s.load_document("Implementation", os.path.join(doc_dir, "implementation.txt"))
    print(f"  Loaded {len(s.documents)} documents")

    # ----------------------------------------------------------
    # Phase 1: Extract facts from the specification
    # ----------------------------------------------------------
    print("\n--- Phase 1: Facts from specification ---")

    load_source(
        s,
        """
        (fact spec-token-expiry 1800
          :evidence (evidence "Spec"
            :quotes ("Default token lifetime MUST be 1800 seconds (30 minutes).")
            :explanation "Specified default token lifetime"))

        (fact spec-max-sessions 3
          :evidence (evidence "Spec"
            :quotes ("Maximum 3 concurrent sessions per user.")
            :explanation "Specified session limit"))

        (fact spec-hash-algorithm "sha256"
          :evidence (evidence "Spec"
            :quotes ("All hashing MUST use SHA-256. MD5 MUST NOT be used anywhere.")
            :explanation "Required hashing algorithm"))

        (fact spec-token-length 64
          :evidence (evidence "Spec"
            :quotes ("Generated tokens MUST be 64 characters (hex-encoded).")
            :explanation "Required token format"))

        (fact spec-md5-forbidden true
          :evidence (evidence "Spec"
            :quotes ("MD5 MUST NOT be used anywhere.")
            :explanation "Security requirement prohibiting MD5"))
    """,
    )

    _print_list(s.list_facts())

    # ----------------------------------------------------------
    # Phase 2: Extract facts from the implementation
    # ----------------------------------------------------------
    print("\n--- Phase 2: Facts from implementation ---")

    load_source(
        s,
        """
        (fact impl-token-expiry 3600
          :evidence (evidence "Implementation"
            :quotes ("TOKEN_EXPIRY = 3600  # seconds")
            :explanation "Actual token expiry in code"))

        (fact impl-max-sessions 5
          :evidence (evidence "Implementation"
            :quotes ("MAX_SESSIONS = 5")
            :explanation "Actual session limit in code"))

        (fact impl-hash-algorithm "sha256"
          :evidence (evidence "Implementation"
            :quotes ("HASH_ALGORITHM = \\"sha256\\"")
            :explanation "Token hashing algorithm in code"))

        (fact impl-session-hash "md5"
          :evidence (evidence "Implementation"
            :quotes ("hashlib.md5(f\\"{user_id}:{now}\\".encode()).hexdigest()")
            :explanation "Session ID generation uses MD5"))

        (fact impl-token-length 64
          :evidence (evidence "Implementation"
            :quotes ("return len(token) == 64  # SHA-256 hex length")
            :explanation "Token validation checks for 64 chars"))
    """,
    )

    _print_list(s.list_facts())

    # ----------------------------------------------------------
    # Phase 3: Cross-validate with diffs
    # ----------------------------------------------------------
    print("\n--- Phase 3: Cross-validation diffs ---")

    load_source(
        s,
        """
        (diff expiry-check
            :replace spec-token-expiry
            :with impl-token-expiry)

        (diff session-limit-check
            :replace spec-max-sessions
            :with impl-max-sessions)

        (diff token-length-check
            :replace spec-token-length
            :with impl-token-length)
    """,
    )

    print("\n  Diff 1 — Token expiry (spec vs impl):")
    print(f"  {s.eval_diff('expiry-check')}")

    print("\n  Diff 2 — Max sessions (spec vs impl):")
    print(f"  {s.eval_diff('session-limit-check')}")

    print("\n  Diff 3 — Token length (spec vs impl):")
    print(f"  {s.eval_diff('token-length-check')}")

    # ----------------------------------------------------------
    # Phase 4: Derive spec violations
    # ----------------------------------------------------------
    print("\n--- Phase 4: Derive spec violations ---")

    load_source(
        s,
        """
        (derive expiry-violates-spec
            (!= impl-token-expiry spec-token-expiry)
            :using (impl-token-expiry spec-token-expiry))

        (derive session-limit-violates-spec
            (!= impl-max-sessions spec-max-sessions)
            :using (impl-max-sessions spec-max-sessions))

        (derive md5-violates-spec
            (and (= impl-session-hash "md5") spec-md5-forbidden)
            :using (impl-session-hash spec-md5-forbidden))

        (derive token-length-matches-spec
            (= impl-token-length spec-token-length)
            :using (impl-token-length spec-token-length))
    """,
    )

    print("  Spec violation theorems:")
    _print_list(s.list_theorems())

    # ----------------------------------------------------------
    # Phase 5: Provenance of a violation
    # ----------------------------------------------------------
    print("\n--- Phase 5: Provenance — why does expiry violate spec? ---")
    print(json.dumps(s.provenance("expiry-violates-spec"), indent=2))

    # ----------------------------------------------------------
    # Phase 6: Consistency report
    # ----------------------------------------------------------
    print("\n--- Phase 6: Consistency report ---")
    report = s.consistency()
    print(f"\n  {report}")

    # ----------------------------------------------------------
    # Summary
    # ----------------------------------------------------------
    print("\n" + "=" * 60)
    print("Divergences found:")
    print(f"  Token expiry:   spec={s.facts['spec-token-expiry'].wff}s, impl={s.facts['impl-token-expiry'].wff}s")
    print(f"  Max sessions:   spec={s.facts['spec-max-sessions'].wff}, impl={s.facts['impl-max-sessions'].wff}")
    print("  MD5 forbidden:  spec=True, impl uses MD5 for sessions")
    print(f"  Token length:   MATCHES (both {s.facts['spec-token-length'].wff})")
    print(f"\nFinal system: {s}")


if __name__ == "__main__":
    main()
"""Authentication module for the user service.

Handles token generation, validation, and session management.
Tokens expire after 3600 seconds (1 hour) by default.
"""

import hashlib
import time
from typing import Optional

TOKEN_EXPIRY = 3600  # seconds
MAX_SESSIONS = 5
HASH_ALGORITHM = "sha256"


def generate_token(user_id: str, secret: str) -> str:
    """Generate an authentication token for the given user.

    Returns:
        A hex-encoded SHA-256 hash token.
    """
    timestamp = str(int(time.time()))
    payload = f"{user_id}:{timestamp}:{secret}"
    return hashlib.sha256(payload.encode()).hexdigest()


def validate_token(token: str, user_id: str, secret: str, max_age: int = TOKEN_EXPIRY) -> bool:
    """Validate a token against user credentials.

    Returns:
        True if the token is valid and not expired, False otherwise.
    """
    if not token or not user_id:
        return False
    return len(token) == 64  # SHA-256 hex length


def create_session(user_id: str, metadata: Optional[dict] = None) -> dict:
    """Create a new session for the user.

    If the user already has MAX_SESSIONS active sessions, the oldest
    session is revoked before creating a new one.
    """
    now = int(time.time())
    return {
        "session_id": hashlib.md5(f"{user_id}:{now}".encode()).hexdigest(),
        "user_id": user_id,
        "created_at": now,
        "expires_at": now + TOKEN_EXPIRY,
    }


def revoke_session(session_id: str) -> bool:
    """Revoke an active session by its ID.

    Returns:
        True if the session was found and revoked, False if not found.
    """
    return True
API Specification: User Authentication Service v2.1

1. Token Management

1.1 Token Generation
  - Tokens MUST be generated using SHA-256 hashing.
  - Token payload MUST include user_id, timestamp, and application secret.
  - Generated tokens MUST be 64 characters (hex-encoded).

1.2 Token Expiry
  - Default token lifetime MUST be 1800 seconds (30 minutes).
  - Maximum allowed token lifetime is 7200 seconds.
  - Expired tokens MUST be rejected with a 401 status code.

1.3 Token Validation
  - Validation MUST check token format, expiry, and user binding.
  - Invalid tokens MUST return False, never raise exceptions.

2. Session Management

2.1 Session Creation
  - Each session MUST have a unique session_id.
  - Session MUST store user_id, created_at, and expires_at.
  - Sessions MUST expire at the same interval as tokens.

2.2 Session Limits
  - Maximum 3 concurrent sessions per user.
  - When the limit is reached, the oldest session MUST be revoked.

2.3 Session Revocation
  - revoke_session MUST return True on success, False if session not found.

3. Security Requirements

3.1 Hashing
  - All hashing MUST use SHA-256. MD5 MUST NOT be used anywhere.
  - Hash algorithm MUST be configurable via HASH_ALGORITHM constant.

3.2 Input Validation
  - All public functions MUST validate inputs before processing.
  - Empty or None values for user_id MUST be rejected.

Output

Click "Run in browser" to execute this demo with Pyodide.