Protocol Extension for Automation Control

Liao Peiyuan Condé Nast United States of America Email: peiyuan_liao@condenast.com · protocol

request-limit: 60/minute
concurrent-limit: 5

allowed-automations: webdriver, headless

api-automation: with-key-only
allow-xhr: read-only
disallow-fetch-from: /account/*, /checkout/*, /admin/*

require-human-initiated-session: true
session-validation: cookie-based
session-ttl: 1h

<script type="application/ld+json">
{
  "@context": "https://schema.org",
  "@type": "AutomationPolicyAnnotation",
  "allowedAutomations": [],
  "allowedPurposes": [],
}
</script>

automation-preferences = *( group )

group            = 1*scope-directive           ; at least one <scope>
                   *( directive / emptyline )
                   1*emptyline                 ; blank line terminates group

directive        = scope-directive / host-directive /
                   user-agent-directive /
                   method-directive / purpose-directive /
                   ; Extended rule: Added extension directives
                   request-limit-directive / concurrent-limit-directive /
                   allowed-automations-directive / api-automation-directive /
                   allow-xhr-directive / disallow-fetch-from-directive /
                   require-human-session-directive / session-validation-directive /
                   session-ttl-directive

; --- Core directives (from CORE-SPEC) ---------------------

scope-directive   = *WS "scope"            *WS ":" *WS url-pattern    EOL
host-directive    = *WS "host"             *WS ":" *WS host-pattern   EOL
method-directive  = *WS "allowed-methods"  *WS ":" *WS method-list    EOL
purpose-directive = *WS "allowed-purposes" *WS ":" *WS purpose-list   EOL
user-agent-directive  = *WS "user-agent" *WS ":" *WS product-token
                        *( *WS "," *WS product-token ) EOL

; --- Extension directives ---------------------------------

request-limit-directive = *WS "request-limit"  *WS ":" *WS rate-spec EOL
concurrent-limit-directive = *WS "concurrent-limit" *WS ":" *WS count EOL
allowed-automations-directive = *WS "allowed-automations" *WS ":" *WS automation-list EOL
api-automation-directive = *WS "api-automation" *WS ":" *WS api-automation-value EOL
allow-xhr-directive = *WS "allow-xhr" *WS ":" *WS allow-xhr-value EOL
disallow-fetch-from-directive = *WS "disallow-fetch-from" *WS ":" *WS url-pattern-list EOL
require-human-session-directive = *WS "require-human-initiated-session" *WS ":" *WS boolean EOL
session-validation-directive = *WS "session-validation" *WS ":" *WS session-validation-value EOL
session-ttl-directive = *WS "session-ttl" *WS ":" *WS duration EOL

; --- Directive value syntax (Core) ------------------------

url-pattern     = 1*( VCHAR / UTF8-char-noctl )
host-pattern    = 1*( ALPHA / DIGIT / "-" / "." / UTF8-char-noctl )
method-list     = method *( *WS "," *WS method )
method          = "GET" / "HEAD" / "POST" / "PUT" /
                  "DELETE" / "PATCH" / "OPTIONS" /
                  "TRACE" / "CONNECT"
purpose-list    = purpose-token *( *WS "," *WS purpose-token )
purpose-token   = 1*VCHAR   ; placeholder for future vocabulary
product-token   = identifier / "*"
identifier      = 1*( %x2D / %x41-5A / %x5F / %x61-7A )

; --- Directive value syntax (Extension) -------------------

rate-spec       = count "/" time-unit
count           = 1*DIGIT
time-unit       = "second" / "minute" / "hour" / "day"

automation-list = *( automation-token *( *WS "," *WS automation-token ) )
automation-token= 1*VCHAR ; e.g., "cdp", "headless", "selenium"

api-automation-value = "none" / "with-key-only" / "open"

allow-xhr-value = "none" / "read-only" / "open"

url-pattern-list= url-pattern *( *WS "," *WS url-pattern )

boolean         = "true" / "false"

session-validation-value = "cookie-based" / "token-based" / "oauth" / "none"

duration        = 1*DIGIT time-unit-char
time-unit-char  = "s" / "m" / "h" / "d"  ; s=second, m=minute,
                                         ; h=hour, d=day

; --- Lexical primitives (from CORE-SPEC) ------------------

comment         = "#" *( UTF8-char-noctl / WS / "#" )

emptyline       = *WS [comment] EOL
EOL             = *WS [comment] NL
NL              = CRLF / LF / CR
CRLF            = CR LF
CR              = %x0D
LF              = %x0A
WS              = SP / HTAB
SP              = %x20
HTAB            = %x09

; --- Core ABNF terminals (RFC 5234) -----------------------

ALPHA           = %x41-5A / %x61-7A
DIGIT           = %x30-39
VCHAR           = %x21-7E

; --- UTF-8 (derived from RFC 3629) ------------------------

UTF8-char-noctl = UTF8-1-noctl / UTF8-2 / UTF8-3 / UTF8-4
UTF8-1-noctl    = %x21 / %x22 / %x24-7F
UTF8-2          = %xC2-DF UTF8-tail
UTF8-3          = %xE0 %xA0-BF UTF8-tail
                / %xE1-EC UTF8-tail-2
                / %xED %x80-9F UTF8-tail
                / %xEE-EF UTF8-tail-2
UTF8-4          = %xF0 %x90-BF UTF8-tail-2
                / %xF1-F3 UTF8-tail-3
                / %xF4 %x80-8F UTF8-tail-2
UTF8-tail       = %x80-BF
UTF8-tail-2     = UTF8-tail UTF8-tail
UTF8-tail-3     = UTF8-tail UTF8-tail UTF8-tail


<!-- Automation preferences for example.com -->
<!-- Version: 2.0 (incorporating extensions) -->
<!-- Last updated: 2025-04-20 -->

<!-- Group 1: Applies to the entire site for all user agents -->
user-agent: *
host: example.com
scope: /
allowed-methods: GET, HEAD
allowed-purposes: PLACEHOLDER_PURPOSE1, PLACEHOLDER_PURPOSE2

<!-- Extended directives for Group 1 -->
request-limit: 60/minute
concurrent-limit: 5
<!-- Empty - forbids all automation technologies -->
allowed-automations:
api-automation: with-key-only
require-human-initiated-session: true
session-validation: cookie-based
session-ttl: 1h


<!-- Group 2: Specific preferences for the /admin/ path for ExampleBot -->
user-agent: ExampleBot
host: example.com
scope: /admin/
allowed-methods: GET
allowed-purposes: PLACEHOLDER_PURPOSE1

<!-- Extended directives for admin path (Group 2) -->
request-limit: 10/minute
concurrent-limit: 2
require-human-initiated-session: true
session-validation: token-based
session-ttl: 30m


<!-- Group 3: Default for /admin/ for other user agents (less specific than Group 2) -->
user-agent: *
host: example.com
scope: /admin/
allowed-methods: GET

Internet-Draft	aipref-autoctl-ext	April 2025
Peiyuan	Expires 22 October 2025	[Page]

Protocol Extension for Automation Control

Abstract

About This Document

Status of This Memo

Copyright Notice

Table of Contents

1. Introduction

1.1. Relationship to Core Specification

2. Conventions and Terminology

3. Extended Protocol Specification

3.1. Rate Limiting

3.2. Automation Technology Restrictions

3.2.1. Protocol Tokens

3.2.2. Runtime Tokens

3.3. API and XHR Permissions

3.4. Session Requirements

3.5. HTML Asset Annotation

4. Formal Syntax

5. Backward Compatibility

6. Implementation and Enforcement

7. Security Considerations

8. IANA Considerations

9. References

9.1. Normative References

9.2. Informative References

Sample Extended automation-preferences.txt File

Author's Address