#!/usr/bin/python3

import os, sys, locale, datetime, hashlib, hmac, base64, re, configparser
import http.client, os.path, xml.etree.ElementTree, collections

def s3request(accesskey, secretkey, httpverb, host, filename, body=None,
              host_has_bucket=False, ssl=True, port=None, headers={},
              content_type=""):
    if host_has_bucket:
        bucket_and_file = "/%s%s" % (host.split(".", 1)[0], filename)
    else:
        bucket_and_file = filename

    for pos in [pos for pos in
                map(lambda q: bucket_and_file.find(q),
                    ("?object-lock", "?marker=", "&key-marker=", "&version-id-marker="))
                if pos > -1]:
        bucket_and_file = bucket_and_file[:pos]

    headers_to_sign = []
    for name in sorted([x for x in headers.keys()
                        if x.lower().startswith("x-amz-")]):
        headers_to_sign.append("%s:%s\n" % (name.lower(),
                                            headers[name].strip()))

    locale.setlocale(locale.LC_TIME, "C")
    headers["Date"] = datetime.datetime.utcnow().strftime("%a, %d %b %Y " +
                                                          "%H:%M:%S GMT")
    headers["Content-Type"] = content_type

    if body is None:
        headers["Content-MD5"] = ""
    else:
        md5 = hashlib.md5()
        md5.update(body)
        headers["Content-MD5"] = base64.b64encode(md5.digest()).decode()

    string_to_sign = "%s\n%s\n%s\n%s\n%s%s" % (httpverb,
                                               headers["Content-MD5"],
                                               headers["Content-Type"],
                                               headers["Date"],
                                               "".join(headers_to_sign),
                                               bucket_and_file)
    sha1_hmac = hmac.new(secretkey.encode(), string_to_sign.encode(),
                         hashlib.sha1);
    headers["Authorization"] = "AWS %s:%s" % (accesskey,
                               base64.b64encode(sha1_hmac.digest()).decode())

    if ssl:
       conn = http.client.HTTPSConnection(host, port)
    else:
       conn = http.client.HTTPConnection(host, port)

    conn.request(httpverb, filename, body, headers)

    return conn.getresponse()

def usage():
    sys.exit("""s3file.py -- manipulate buckets and objects on S3 storages

Usage:
  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v]
            <GET|HEAD|PUT|DELETE> <host> /bucket/path/to/file [<local file>]
  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v]
            -b <GET|HEAD|PUT|DELETE> <bucket.host> /path/to/file [<local file>]

  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v] <COPY|MOVE> <host>
            /srcbucket/srcpath/srcfile /dstbucket/dstpath/dstfile
  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v] -b <COPY|MOVE> <bucket.host>
            /srcpath/srcfile /dstpath/dstfile

  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v] <shortcut> <host> /bucket/
  s3file.py [-e] [-s] [-p<port>] [-h<header:value> ...] [-t<content-type>]
            [-t] [-i<inifile>] [-v] -b <shortcut> <bucket.host> /

  s3file.py --version
  s3file.py --examples

Parameters:
  -e                Print the HTTP ETag header returned by the server
  -s                Use HTTP instead of HTTPS
  -p<port>          Connect to <port> instead of 443 or 80
  -h<header:value>  Add an additional HTTP header to the request; maybe given
                    multiple times
  -t<content_type>  Set the content type for a PUT request
  -t                Show the content type returned by GET or HEAD
  -i<inifile>       Read accesskey and secretkey from the [s3file] section of
                    the given ini file
  -v                Show all HTTP headers returned by the request
  -b                Hostname is prefixed by the name of the bucket
  <shortcut>        One of:
                    CREATE-BUCKET
                    CREATE-LOCKED-BUCKET
                    GET-LOCKED-BUCKET
                    COMPLIANCE-YEARS-<years>
                    COMPLIANCE-DAYS-<days>
                    GOVERNANCE-YEARS-<years>
                    GOVERNANCE-DAYS-<days>
                    ENABLE-VERSIONING
                    GET-VERSIONING
                    DISABLE-VERSIONING
                    GET-LOGGING
                    LS
                    LS-<lsflag>
  <lsflag>          Any combination of:
                    L  long output
                    A  show object versions
                    H  human readable filesize
                    N  numeric user id
  --version         Display version number
  --examples        Show a tutorial
""")

def examples():
    sys.exit("""Examples:

1.  Create a bucket:
    $ export S3ACCESSKEY="someuser"
    $ export S3SECRETKEY="somekey"
    $ echo -n "" | s3file.py PUT s3.somehost.invalid /new-bucket
  Or:
    $ s3file.py CREATE-BUCKET s3.somehost.invalid /new-bucket

2.  Create a bucket, but read credentials from an ini file:
    $ echo '[s3file]
accesskey = someuser
secretkey = somekey' > s3file.ini
    $ echo -n "" | s3file.py -is3file.ini PUT s3.somehost.invalid /new-bucket
  Or:
    $ s3file.py -is3file.ini CREATE-BUCKET s3.somehost.invalid /new-bucket

3.  Delete a bucket:
    $ s3file.py -is3file.ini DELETE s3.somehost.invalid /my-bucket

4.  Upload a local file named bar.txt and save it as foo.txt:
    $ s3file.py -is3file.ini PUT s3.somehost.invalid /my-bucket/foo.txt bar.txt

5.  Delete a file:
    $ s3file.py -is3file.ini DELETE s3.somehost.invalid /my-bucket/foo.txt

6.  Write the content of a remote file to stdout:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/readme.txt

7.  Read from stdin and save it as remote file:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/readme.txt
...some content...
<Ctrl+D>

8.  Copy a file, possibly from one bucket to another:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/readme.txt | \\
      s3file.py -iconfig2.ini PUT s3.otherhost.invalid /bucket2/cool.txt
  Or:
    $ s3file.py -is3file.ini COPY s3.somehost.invalid /my-bucket/readme.txt \\
      /bucket2/cool.txt

9.  Move a file, possibly from one bucket to another:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/src.txt | \\
      s3file.py -iconfig2.ini PUT s3.otherhost.invalid /bucket2/dst.txt
    $ s3file.py -is3file.ini DELETE s3.somehost.invalid /my-bucket/src.txt
  Or:
    $ s3file.py -is3file.ini MOVE s3.somehost.invalid /my-bucket/src.txt \\
      /bucket2/dst.txt

10. Download a remote file and save it locally, but assume the first part of
    the hostname is the name of the bucket: 
    $ s3file.py -is3file.ini GET my-bucket.s3.somehost.invalid /read.me \\
      ohey.txt

11. Create a bucket with object locking:
    $ echo -n "" | s3file.py -is3file.ini \\
      -hx-amz-bucket-object-lock-enabled:True PUT s3.somehost.invalid \\
      /newbucket
  Or:
    $ s3file.py -is3file.ini CREATE-LOCKED-BUCKET s3.somehost.invalid \\
      /newbucket

12. Show whether a bucket has object locking enabled (HTTP error 404 means
    object locking is disabled):
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/?object-lock
  Or:
    $ s3file.py -is3file.ini GET-LOCKED-BUCKET s3.somehost.invalid /my-bucket/

13. Enable object locking in compliance mode for a bucket and set the default
    retention to 100 days:
    $ echo '<?xml version="1.0""?>
<ObjectLockConfiguration>
<ObjectLockEnabled>Enabled</ObjectLockEnabled>
<Rule>
<DefaultRetention>
<Mode>COMPLIANCE</Mode>
<Days>100</Days>
</DefaultRetention>
</Rule>
</ObjectLockConfiguration>' | \\
      s3file.py -is3file.ini PUT s3.somehost.invalid /my-bucket/?object-lock
  Or:
    $ s3file.py -is3file.ini COMPLIANCE-DAYS-100 s3.somehost.invalid /my-bucket

14. Enable object locking in governance mode for a bucket and set the default
    retention to 2 years:
    $ echo '<?xml version="1.0""?>
<ObjectLockConfiguration>
<ObjectLockEnabled>Enabled</ObjectLockEnabled>
<Rule>
<DefaultRetention>
<Mode>GOVERNANCE</Mode>
<Years>2</Years>
</DefaultRetention>
</Rule>
</ObjectLockConfiguration>' | \\
      s3file.py -is3file.ini PUT s3.somehost.invalid /my-bucket/?object-lock
  Or:
    $ s3file.py -is3file.ini GOVERNANCE-YEARS-2 s3.somehost.invalid /my-bucket/

15. Enable object versioning for a bucket:
    $ echo '<?xml version="1.0"?>
<VersioningConfiguration>
<Status>Enabled</Status>
<MfaDelete>Disabled</MfaDelete>
</VersioningConfiguration>' | \\
      s3file.py -is3file.ini PUT s3.somehost.invalid /my-bucket/?versioning
  Or:
    $ s3file.py -is3file.ini ENABLE-VERSIONING s3.somehost.invalid /my-bucket/

16. Show whether a bucket has object versioning enabled:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/?versioning
  Or:
    $ s3file.py -is3file.ini GET-VERSIONING s3.somehost.invalid /my-bucket/

17. List all versioned objects:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/?versions
  Or:
    $ s3file.py -is3file.ini LS-A s3.somehost.invalid /my-bucket/

18. Disable object versioning for a bucket:
    $ echo '<?xml version="1.0"?>
<VersioningConfiguration>
<Status>Disabled</Status>
<MfaDelete>Disabled</MfaDelete>
</VersioningConfiguration>' | \\
      s3file.py -is3file.ini PUT s3.somehost.invalid /my-bucket/?versioning
  Or:
    $ s3file.py -is3file.ini DISABLE-VERSIONING s3.somehost.invalid /my-bucket/

19. Show whether a bucket has access logging enabled:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/?logging
  Or:
    $ s3file.py -is3file.ini GET-LOGGING s3.somehost.invalid /my-bucket/

20. List all objects:
    $ s3file.py -is3file.ini GET s3.somehost.invalid /my-bucket/
  Or:
    $ s3file.py -is3file.ini LS s3.somehost.invalid /my-bucket/
  Or:
    $ s3file.py -is3file.ini LS-L s3.somehost.invalid /my-bucket/
  Or:
    $ s3file.py -is3file.ini LS-LH s3.somehost.invalid /my-bucket/
  Or:
    $ s3file.py -is3file.ini LS-LHN s3.somehost.invalid /my-bucket/
  Or:
    $ s3file.py -is3file.ini LS-LN s3.somehost.invalid /my-bucket/
""")

def matches(pattern, string, flags=0):
    matches.matches = re.match(pattern, string, flags)
    return matches.matches

def bytes_pretty(filesize):
    if filesize < 1024:
        return f"{filesize}B"
    for suffix in ("K", "M", "G", "T"):
        filesize /= 1024
        if filesize < 1024:
            break
    return f"{filesize:.2f}{suffix}"

class File:
    name = ""
    modified = ""
    size = ""
    user = ""
    deleted = False
    latest = True

class LsParser:
    def __init__(self, filename, longflag, humanflag, numericflag):
        self.filename = filename
        self.xmlparser = xml.etree.ElementTree.XMLPullParser(("start", "end",))
        self.longflag = longflag
        self.humanflag = humanflag
        self.numericflag = numericflag
        self.files = []
        self.longest_user = 0
        self.longest_size = 0
        self.longest_modified = 0
        self.longest_name = 0
        self.marker = ""
        self.keymarker = ""
        self.versionidmarker = ""

    def write(self, barr):
        self.xmlparser.feed(barr)

    def print(self):
        in_listbucketresult = False
        in_contents = False
        in_owner = False

        for event, elem in self.xmlparser.read_events():
            if event == "start":
                if elem.tag.endswith("Owner") and in_contents:
                    in_owner = True
                elif ((elem.tag.endswith("Contents") or
                       elem.tag.endswith("Version")) and
                      in_listbucketresult):
                    in_contents = True
                    file = File()
                    file.deleted = False
                elif elem.tag.endswith("DeleteMarker") and in_listbucketresult:
                    in_contents = True
                    file = File()
                    file.deleted = True
                elif (elem.tag.endswith("ListBucketResult") or
                      elem.tag.endswith("ListVersionsResult")):
                    in_listbucketresult = True
            elif event == "end":
                if (elem.tag.endswith("DisplayName") and in_owner and
                    not self.numericflag):
                    file.user = elem.text
                    if len(file.user) > self.longest_user:
                        self.longest_user = len(file.user)
                elif (elem.tag.endswith("ID") and in_owner and
                      self.numericflag):
                    file.user = elem.text
                    if len(file.user) > self.longest_user:
                        self.longest_user = len(file.user)
                elif elem.tag.endswith("Size") and in_contents:
                    file.size = (bytes_pretty(int(elem.text)) if
                                 self.humanflag and elem.text.isnumeric()
                                 else elem.text)
                    if len(file.size) > self.longest_size:
                        self.longest_size = len(file.size)
                elif elem.tag.endswith("LastModified") and in_contents:
                    file.modified = elem.text
                    if len(file.size) > self.longest_modified:
                        self.longest_modified = len(file.size)
                elif elem.tag.endswith("Key") and in_contents:
                    file.name = elem.text
                    if len(file.name) > self.longest_name:
                        self.longest_name = len(file.name)
                elif elem.tag.endswith("IsLatest") and in_contents:
                    file.latest = elem.text.lower() != "false"
                elif (elem.tag.endswith("NextMarker") and in_listbucketresult
                      and elem.text):
                    self.marker = elem.text
                elif (elem.tag.endswith("NextKeyMarker") and
                      in_listbucketresult and elem.text):
                    self.keymarker = elem.text
                elif (elem.tag.endswith("NextVersionIdMarker") and
                      in_listbucketresult and elem.text):
                    self.versionidmarker = elem.text
                elif elem.tag.endswith("Owner") and in_owner:
                    in_owner = False
                elif (elem.tag.endswith("Contents") or
                      elem.tag.endswith("Version") or
                      elem.tag.endswith("DeleteMarker")) and in_contents:
                    in_contents = False
                    self.files.append(file)
                elif ((elem.tag.endswith("ListBucketResult") or
                       elem.tag.endswith("ListVersionsResult")) and
                      in_listbucketresult):
                    in_listbucketresult = False

        self.xmlparser = xml.etree.ElementTree.XMLPullParser(("start", "end",))

        if self.longflag:
            for file in self.files:
                print(f"{'d' if file.deleted else '-'}"
                      f"{'l' if file.latest else '-'} "
                      f"{file.user.rjust(self.longest_user)} "
                      f"{file.size.rjust(self.longest_size)} "
                      f"{file.modified.rjust(self.longest_modified)} "
                      f"{file.name.rjust(self.longest_name)}")
        else:
            for file in self.files:
                print(file.name)

    def next(self):
        if self.marker:
            cmd = "%s?marker=%s" % (self.filename, self.marker)
            self.marker = ""
            return ("GET", {}, cmd, 200)
        elif self.keymarker and self.versionidmarker:
            cmd = "%s?versions&key-marker=%s&version-id-marker=%s" % (
                self.filename, self.keymarker, self.versionidmarker)
            self.keymarker, self.versionidmarker = "", ""
            return ("GET", {}, cmd, 200)
        else:
            return ()


host_has_bucket = False
ssl = True
port = None
show_etag = False
headers = {}
content_type = ""
show_content_type = False
inifile = None
show_all_headers = False

arg_pos = 1
while arg_pos < len(sys.argv) and sys.argv[arg_pos].startswith("-"):
    if sys.argv[arg_pos] == "-b":
        host_has_bucket = True
    elif sys.argv[arg_pos] == "-s":
        ssl = False
    elif sys.argv[arg_pos] == "-e":
        show_etag = True
    elif not re.match(r"-p\d+$", sys.argv[arg_pos]) is None:
        port = int(sys.argv[arg_pos][2:])
    elif not re.match(r"-h[^:]+:.+$", sys.argv[arg_pos]) is None:
        name, value = sys.argv[arg_pos][2:].split(":", 1)
        headers[name] = value
    elif not re.match(r"-t.+$", sys.argv[arg_pos]) is None:
        content_type = sys.argv[arg_pos][2:]
    elif sys.argv[arg_pos] == "-t":
        show_content_type = True
    elif sys.argv[arg_pos] == "--examples":
        examples()
    elif not re.match(r"-i.+$", sys.argv[arg_pos]) is None:
        inifile = os.path.expanduser(sys.argv[arg_pos][2:])
    elif sys.argv[arg_pos] == "-v":
        show_all_headers = True
    elif sys.argv[arg_pos] == "--version":
        sys.exit("20220604")
    else:
        usage()
    arg_pos += 1

if (len(sys.argv) - arg_pos) < 3 or (len(sys.argv) - arg_pos) > 4:
    usage()

httpverb = sys.argv[arg_pos]
arg_pos += 1
host = sys.argv[arg_pos]
arg_pos += 1
filename = sys.argv[arg_pos]
arg_pos += 1
if len(sys.argv) == (arg_pos + 1):
    local_file = sys.argv[arg_pos]
else:
    local_file = None

if inifile is None:
    accesskey = os.getenv("S3ACCESSKEY")
    if accesskey is None:
        sys.exit("no S3ACCESSKEY environment variable")

    secretkey = os.getenv("S3SECRETKEY")
    if secretkey is None:
        sys.exit("no S3SECRETKEY environment variable")
else:
    config = configparser.ConfigParser()
    config.read(inifile)

    if not "s3file" in config:
        sys.exit("no s3file section in %s" % inifile)
    if not "accesskey" in config["s3file"]:
        sys.exit("no accesskey in s3file section of %s" % inifile)
    if not "secretkey" in config["s3file"]:
        sys.exit("no secretkey in s3file section of %s" % inifile)

    accesskey = config["s3file"]["accesskey"]
    secretkey = config["s3file"]["secretkey"]

body = None
output = os.fdopen(sys.stdout.fileno(), "wb", closefd=False)
output.print = lambda: None
output.next = lambda: ()
cmds = collections.deque()

if httpverb == "GET":
    cmds.append((httpverb, headers, filename, 200))
elif httpverb == "HEAD":
    if local_file:
        sys.exit("HEAD and local file are mutual exclusive")
    cmds.append((httpverb, headers, filename, 200))
elif httpverb == "PUT":
    if local_file is None:
        body = os.fdopen(sys.stdin.fileno(), "rb").read()
    else:
        body = open(local_file, "rb").read()
    cmds.append((httpverb, headers, filename, 200))
elif httpverb == "DELETE":
    if local_file:
        sys.exit("DELETE and local file are mutual exclusive")
    cmds.append((httpverb, headers, filename, 204))
elif httpverb == "CREATE-BUCKET":
    if local_file:
        sys.exit("CREATE-BUCKET and local file are mutual exclusive")
    body = b""
    cmds.append(("PUT", headers, filename, 200))
elif httpverb == "CREATE-LOCKED-BUCKET":
    if local_file:
        sys.exit("CREATE-LOCKED-BUCKET and local file are mutual exclusive")
    headers["x-amz-bucket-object-lock-enabled"] = "True"
    body = b""
    cmds.append(("PUT", headers, filename, 200))
elif httpverb == "GET-LOCKED-BUCKET":
    if local_file:
        sys.exit("GET-LOCKED-BUCKET and local file are mutual exclusive")
    cmds.append(("GET", headers, "%s?object-lock" % filename, 200))
elif (matches("(COMPLIANCE)-(YEARS|DAYS)-(\d+)$", httpverb) or
      matches("(GOVERNANCE)-(YEARS|DAYS)-(\d+)$", httpverb)):
    if local_file:
        sys.exit("%s and local file are mutual exclusive" % httpverb)
    mode, timespec, duration = matches.matches.group(1, 2, 3)
    timespec = timespec.title()
    xml = """<?xml version="1.0""?>
<ObjectLockConfiguration>
<ObjectLockEnabled>Enabled</ObjectLockEnabled>
<Rule>
<DefaultRetention>
<Mode>%s</Mode>
<%s>%s</%s>
</DefaultRetention>
</Rule>
</ObjectLockConfiguration>""" % (mode, timespec, duration, timespec)
    body = xml.encode()
    cmds.append(("PUT", headers, "%s?object-lock" % filename, 200))
elif matches("(ENABLE|DISABLE)-VERSIONING", httpverb):
    if local_file:
        sys.exit("%s and local file are mutual exclusive" % httpverb)
    xml = """<?xml version="1.0"?>
<VersioningConfiguration>
<Status>%sd</Status>
<MfaDelete>Disabled</MfaDelete>
</VersioningConfiguration>""" % matches.matches.group(1).title()
    body = xml.encode()
    cmds.append(("PUT", headers, "%s?versioning" % filename, 200))
elif httpverb == "GET-VERSIONING":
    if local_file:
        sys.exit("GET-VERSIONING and local file are mutual exclusive")
    cmds.append(("GET", headers, "%s?versioning" % filename, 200))
elif httpverb == "GET-LOGGING":
    if local_file:
        sys.exit("GET-LOGGING and local file are mutual exclusive")
    cmds.append(("GET", headers, "%s?logging" % filename, 200))
elif matches("LS(-[LAHN]+)?$", httpverb):
    if local_file:
        sys.exit("%s and local file are mutual exclusive" % httpverb)
    flags = matches.matches.group(1)
    output = LsParser(filename,
                      flags and "L" in flags,
                      flags and "H" in flags,
                      flags and "N" in flags)
    cmds.append(("GET", headers, "%s%s" % (filename,
                 "?versions" if flags and "A" in flags else ""), 200))
elif httpverb == "COPY":
    if not local_file:
        sys.exit("COPY needs a remote destination filename")
    headers["x-amz-copy-source"] = filename
    cmds.append(("PUT", headers, local_file, 200))
elif httpverb == "MOVE":
    if not local_file:
        sys.exit("MOVE needs a remote destination filename")
    copyheaders = headers.copy()
    copyheaders["x-amz-copy-source"] = filename
    cmds.append(("PUT", copyheaders, local_file, 200))
    cmds.append(("DELETE", headers, filename, 204))
else:
    usage()

while len(cmds):
    httpverb, headers, filename, expected_response = cmds.popleft()
    response = s3request(accesskey, secretkey, httpverb, host, filename, body,
                         host_has_bucket, ssl, port, headers, content_type)
    if response.status != expected_response:
        sys.exit(str(response.status) + " " + response.reason)

    content_length = response.getheader("Content-Length")
    if not content_length:
        sys.stderr.write("warning: no Content-Length\n")

    if show_etag:
        etag = response.getheader("ETag")
        if etag is None:
            sys.stderr.write("warning: no ETag header\n")
        else:
            sys.stderr.write("ETag: %s\n" % etag)

    if show_content_type:
        content_type = response.getheader("Content-Type")
        if content_type is None:
            sys.stderr.write("warning: no Content-Type header\n")
        else:
            sys.stderr.write("Content-Type: %s\n" % content_type)

    if show_all_headers:
        for k,v in response.getheaders():
            sys.stderr.write("%s: %s\n" % (k, v))

    if httpverb == "GET":
        if not local_file is None:
            output = open(local_file, "wb")

    CHUNK_LENGTH = 10 * 1024 * 1024
    read_bytes = 0

    while True:
        chunk = response.read(CHUNK_LENGTH)
        output.write(chunk)
        read_bytes += len(chunk)
        if not len(chunk):
            break

    if content_length and str(read_bytes) != content_length:
        sys.exit("expected %sB, got %i" % (content_length, read_bytes))

    output.print()

    cmd = output.next()
    if cmd:
        cmds.append(cmd)