From bbe2ba716798ada88da2bf65cbffa5e13b2a622c Mon Sep 17 00:00:00 2001
From: Erik Auerswald <auerswal@unix-ag.uni-kl.de>
Date: Sun, 3 May 2020 18:44:30 +0200
Subject: [PATCH] decorate: combined sort of IPv4 and IPv6 addresses

IPv4 and IPv6 addresses can be seen as IP addresses.  Logs from a
"dual-stack" application, e.g., a web server, may contain either an IPv4
or IPv6 address at a given position in each line.  Thus if one wants to
sort the log file on the IP address, both IPv4 and IPv6 addresses need
to be accepted as sort key and sorted consistently.  One approach is to
transform one address type into the other before sorting.  IPv6 supports
the transformation of IPv4 addresses into IPv6 addresses.

There are two common methods for accomodating IPv4 addresses in IPv6:
IPv4-Mapped addresses and the deprecated IPv4-Compatible addresses.
Both can be used to convert a given IPv4 address to an IPv6 address.
Both IPv4-Mapped and IPv4-Compatible IPv6 address ranges are reserved
by IANA and always represent IPv4 addresses in a dual stack enabled
application.  IPv4-Compatible addresses just add 96 leading zero bits to
the 32 bit IPv4 address to create a 128 bit IPv6 address.  This results in
an ambiguity for the unspecified address (all-zero in both IPv4 and IPv6)
and the IPv6 localhost address ::1 with the first host address of "this"
network in IPv4 (0.0.0.1).  IPv4-Mapped addresses avoid this ambiguity.
But since IPv4-Compatible IPv6 addresses can be seen as treating the IP
address (both version 4 and version 6) as a specific way to represent an
integer value I think it is useful to support this transformation as well.

This commit adds two conversion methods to decorate: ipv6v4map and
ipv6v4comp.  The conversion logically converts an IPv4 address to an
IPv6 address, but the code actually creates a textual representation of
an 128 bit integer from either an IPv4 or IPv6 address.

Functionality like this was requested for sort from GNU Coreutils:
  https://lists.gnu.org/archive/html/coreutils/2011-06/msg00078.html
  https://lists.gnu.org/r/bug-coreutils/2015-06/msg00039.html

It was rejected for sort from GNU Coreutils:
  https://www.gnu.org/software/coreutils/rejected_requests.html#sort
  https://lists.gnu.org/r/bug-coreutils/2015-06/msg00041.html

Thus it seems appropriate for decorate(1).

* NEWS: Mention new decorate feature.
* src/decorate-functions.c: Implement new conversions.
* tests/decorate-errors.pl, tests/decorate-sort-tests.pl,
  tests/decorate-tests.pl: Add tests for new conversions.
---
 NEWS                         |  7 +++++
 src/decorate-functions.c     | 61 +++++++++++++++++++++++++++++++-----
 tests/decorate-errors.pl     | 24 ++++++++++++++
 tests/decorate-sort-tests.pl | 42 +++++++++++++++++++++++++
 tests/decorate-tests.pl      | 44 ++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index ee831ad..15b0bfa 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,12 @@
 * Noteworthy changes in release ?.? (?-?-?) []
 
+** New Features
+
+  Decorate now supports sorting IP addresses of both versions 4 and 6
+  together. IPv4 addresses are logically converted to IPv6 addresses,
+  either as IPv4-Mapped (ipv6v4map) or IPv4-Compatible (ipv6v4comp)
+  addresses.
+
 ** Bug Fixes
 
   Datamash now passes the -z/--zero-terminated flag to the sort(1) child
diff --git a/src/decorate-functions.c b/src/decorate-functions.c
index c50387f..6b28492 100644
--- a/src/decorate-functions.c
+++ b/src/decorate-functions.c
@@ -178,14 +178,61 @@ decorate_ipv6 (const char* in)
 }
 
 
+static bool
+decorate_ipv6_ipv4 (const char* in, uint32_t mapping)
+{
+  struct in_addr adr4;
+  struct in6_addr adr6;
+  int s4, s6;
+
+  s4 = inet_pton (AF_INET, in, &adr4);
+  s6 = inet_pton (AF_INET6, in, &adr6);
+
+  if (s4 < 0 && s6 < 0)
+    die (SORT_FAILURE, errno, _("inet_pton failed for AF_INET and AF_INET6"));
+
+  if (!(s4 > 0 || s6 > 0))
+    {
+      error (0, 0, _("invalid IP address %s"), quote (in));
+      return false;
+    }
+
+  if (s6)
+    for (int i=0;i<16;++i)
+      printf ("%02X", adr6.s6_addr[i]);
+  else
+    printf ("%024X%08X", mapping, ntohl (adr4.s_addr));
+
+  return true;
+}
+
+
+bool
+decorate_ipv6_ipv4_mapped (const char* in)
+{
+  return decorate_ipv6_ipv4 (in, 0xFFFF);
+}
+
+
+bool
+decorate_ipv6_ipv4_compat (const char* in)
+{
+  return decorate_ipv6_ipv4 (in, 0);
+}
+
+
 
 struct conversions_t builtin_conversions[] = {
-  { "as-is",    "copy as-is", decorate_as_is },     /* for debugging */
-  { "roman",    "roman numerals", decorate_roman_numerals },
-  { "strlen",   "length (in bytes) of the specified field", decorate_strlen },
-  { "ipv4",     "dotted-decimal IPv4 addresses", decorate_ipv4_dot_decimal },
-  { "ipv6",     "IPv6 addresses", decorate_ipv6 },
-  { "ipv4inet", "number-and-dots IPv4 addresses (incl. octal, hex values)",
+  { "as-is",      "copy as-is", decorate_as_is },     /* for debugging */
+  { "roman",      "roman numerals", decorate_roman_numerals },
+  { "strlen",     "length (in bytes) of the specified field", decorate_strlen },
+  { "ipv4",       "dotted-decimal IPv4 addresses", decorate_ipv4_dot_decimal },
+  { "ipv6",       "IPv6 addresses", decorate_ipv6 },
+  { "ipv4inet",   "number-and-dots IPv4 addresses (incl. octal, hex values)",
     decorate_ipv4_inet_addr },
-  { NULL,       NULL, 0 }
+  { "ipv6v4map",  "IPv6 and IPv4 (as IPv4-Mapped IPv6) addresses",
+    decorate_ipv6_ipv4_mapped},
+  { "ipv6v4comp", "IPv6 and IPv4 (as IPv4-Compatible IPv6) addresses",
+    decorate_ipv6_ipv4_compat},
+  { NULL,         NULL, 0 }
 };
diff --git a/tests/decorate-errors.pl b/tests/decorate-errors.pl
index c8f4202..9fb979b 100644
--- a/tests/decorate-errors.pl
+++ b/tests/decorate-errors.pl
@@ -154,6 +154,30 @@ my @Tests =
  ['c5', '--decorate -k1,1:ipv6' , {IN_PIPE=>"FOO\n"}, {OUT => " "}, {EXIT=>2},
   {ERR=>"$prog: invalid IPv6 address 'FOO'\n" .
         "$prog: conversion failed in line 1\n" }],
+ ['c6', '--decorate -k1,1:ipv6v4map', {IN_PIPE=>"FOO\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address 'FOO'\n" .
+        "$prog: conversion failed in line 1\n" }],
+ ['c7', '--decorate -k1,1:ipv6v4comp', {IN_PIPE=>"FOO\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address 'FOO'\n" .
+        "$prog: conversion failed in line 1\n" }],
+ ['c8', '--decorate -k1,1:ipv6v4map', {IN_PIPE=>"0\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address '0'\n" .
+        "$prog: conversion failed in line 1\n" }],
+ ['c9', '--decorate -k1,1:ipv6v4comp', {IN_PIPE=>"0\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address '0'\n" .
+        "$prog: conversion failed in line 1\n" }],
+ ['c10', '--decorate -k1,1:ipv6v4map', {IN_PIPE=>"\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address ''\n" .
+        "$prog: conversion failed in line 1\n" }],
+ ['c11', '--decorate -k1,1:ipv6v4comp', {IN_PIPE=>"\n"}, {OUT => " "},
+  {EXIT=>2},
+  {ERR=>"$prog: invalid IP address ''\n" .
+        "$prog: conversion failed in line 1\n" }],
 
 
   # on a different architecture, would printf(%Lg) print something else?
diff --git a/tests/decorate-sort-tests.pl b/tests/decorate-sort-tests.pl
index bada9f8..88b75ec 100644
--- a/tests/decorate-sort-tests.pl
+++ b/tests/decorate-sort-tests.pl
@@ -177,6 +177,45 @@ D   192.168.17.8
 M   192.168.43.1
 EOF
 
+my $in2=<<'EOF';
+203.0.113.47
+192.0.2.33
+203.0.113.0
+192.0.2.3
+0.0.0.0
+::ffff:192.0.2.42
+2001:db8:6:5:4:3:2:1
+2001:Db8::
+::192.0.2.41
+::1
+EOF
+
+my $out2_ipv6v4map=<<'EOF';
+::1
+::192.0.2.41
+0.0.0.0
+192.0.2.3
+192.0.2.33
+::ffff:192.0.2.42
+203.0.113.0
+203.0.113.47
+2001:Db8::
+2001:db8:6:5:4:3:2:1
+EOF
+
+my $out2_ipv6v4comp=<<'EOF';
+0.0.0.0
+::1
+192.0.2.3
+192.0.2.33
+::192.0.2.41
+203.0.113.0
+203.0.113.47
+::ffff:192.0.2.42
+2001:Db8::
+2001:db8:6:5:4:3:2:1
+EOF
+
 
 my @Tests =
 (
@@ -195,6 +234,9 @@ my @Tests =
  ['s11', '-k2n,2 -k1,1:roman' , {IN_PIPE=>$in1},
   {OUT => $out1_dec_k2n_roman}],
 
+ ['s12', '-k1,1:ipv6v4map',  {IN_PIPE=>$in2}, {OUT => $out2_ipv6v4map}],
+ ['s13', '-k1,1:ipv6v4comp', {IN_PIPE=>$in2}, {OUT => $out2_ipv6v4comp}],
+
 
  # Sort with header lines
  ['sh1', '-H -k2,2r:ipv4', {IN_PIPE=>$in1}, {OUT=>$out1_dec_ipv4_rev_header1}],
diff --git a/tests/decorate-tests.pl b/tests/decorate-tests.pl
index 5131bf1..87a4a95 100755
--- a/tests/decorate-tests.pl
+++ b/tests/decorate-tests.pl
@@ -222,6 +222,47 @@ C0A81164 C   192.168.17.100
 C0A8110A L   192.168.17.10
 EOF
 
+my $in4=<<'EOF';
+203.0.113.47
+192.0.2.33
+203.0.113.0
+192.0.2.3
+0.0.0.0
+::ffff:192.0.2.42
+2001:db8:6:5:4:3:2:1
+2001:Db8::
+::192.0.2.41
+::1
+::
+EOF
+
+my $out4_ipv6v4map=<<'EOF';
+00000000000000000000FFFFCB00712F 203.0.113.47
+00000000000000000000FFFFC0000221 192.0.2.33
+00000000000000000000FFFFCB007100 203.0.113.0
+00000000000000000000FFFFC0000203 192.0.2.3
+00000000000000000000FFFF00000000 0.0.0.0
+00000000000000000000FFFFC000022A ::ffff:192.0.2.42
+20010DB8000600050004000300020001 2001:db8:6:5:4:3:2:1
+20010DB8000000000000000000000000 2001:Db8::
+000000000000000000000000C0000229 ::192.0.2.41
+00000000000000000000000000000001 ::1
+00000000000000000000000000000000 ::
+EOF
+
+my $out4_ipv6v4comp=<<'EOF';
+000000000000000000000000CB00712F 203.0.113.47
+000000000000000000000000C0000221 192.0.2.33
+000000000000000000000000CB007100 203.0.113.0
+000000000000000000000000C0000203 192.0.2.3
+00000000000000000000000000000000 0.0.0.0
+00000000000000000000FFFFC000022A ::ffff:192.0.2.42
+20010DB8000600050004000300020001 2001:db8:6:5:4:3:2:1
+20010DB8000000000000000000000000 2001:Db8::
+000000000000000000000000C0000229 ::192.0.2.41
+00000000000000000000000000000001 ::1
+00000000000000000000000000000000 ::
+EOF
 
 my @Tests =
 (
@@ -242,6 +283,9 @@ my @Tests =
   {OUT=>$out1_strlen3}],
  ['d11', '--decorate -k1,1:strlen -k2.2,2:strlen' ,  {IN_PIPE=>$in1},
   {OUT=>$out1_strlen4}],
+ ['d12', '--decorate -k1:ipv6v4map', {IN_PIPE=>$in4}, {OUT=>$out4_ipv6v4map}],
+ ['d13', '--decorate -k1:ipv6v4comp', {IN_PIPE=>$in4},
+  {OUT=>$out4_ipv6v4comp}],
 
  ## basic undecoration
  ['u1', '--undecorate 1' , {IN_PIPE=>$out1_dec_roman}, {OUT => $in1}],
-- 
2.17.1

