diff --git a/OAS3.yml b/OAS3.yml index 16c65a6..501f221 100644 --- a/OAS3.yml +++ b/OAS3.yml @@ -200,6 +200,84 @@ paths: '404': $ref: '#/components/responses/NotFound' + /document/{id}/{type}: + parameters: + - in: path + name: id + required: true + schema: + type: string + description: Entity ID + - in: path + name: type + required: true + schema: + type: string + enum: [application, sepa] + description: Type of document to upload + - in: header + name: Authentication + schema: + type: string + description: Authentication token + post: + summary: Upload a PDF document for a member + description: Note that the entry must be updated with the URI obtained from this call + tags: + - document + requestBody: + description: The document + content: + 'application/pdf': + schema: + type: string + format: binary + responses: + '201': + description: File has been stored ("created") locally, returns the URI for downloading the file + content: + text/plain: + schema: + type: string + format: uri + '303': + description: The file is already in storage, returns the URI for downloading the file + content: + text/plain: + schema: + type: string + format: uri + '401': + $ref: '#/components/responses/AuthenticationRequired' + '403': + $ref: '#/components/responses/NotAllowed' + '405': + $ref: '#/components/responses/InvalidInput' + '500': + $ref: '#/components/responses/InternalError' + get: + summary: Get a PDF document for a member + tags: + - document + responses: + '200': + description: Returns PDF data + content: + 'application/pdf': + schema: + type: string + format: binary + '404': + $ref: '#/components/responses/NotFound' + '401': + $ref: '#/components/responses/AuthenticationRequired' + '403': + $ref: '#/components/responses/NotAllowed' + '405': + $ref: '#/components/responses/InvalidInput' + '500': + $ref: '#/components/responses/InternalError' + components: schemas: health: @@ -236,4 +314,3 @@ components: schema: type: string example: error message - diff --git a/README.md b/README.md index a6d6afb..2baa98e 100644 --- a/README.md +++ b/README.md @@ -9,4 +9,7 @@ Query and manipulate the Netz39 entities database. The service is configured via the following environment variables: * `PORT`: Service port. defaults to 8080 * `AUTH`: Authentication tokens, defaults to None. Example Configuration : `AUTH={"token_1": "user_1", "token_2": "user_2"}` - +* `GIT_ORIGIN`: URL for the origin Git repository, including the user name +* `GIT_PASSWORD`: The git password for the user encoded in the origin URL +* `GIT_PULL_INTV`: Time interval between automated pull operations (default: 30s) +* `GIT_WC_PATH`: Set a path for the working copy. Will create a temporary checkout if not provided. diff --git a/app.py b/app.py index 7bff5b8..691e2d4 100644 --- a/app.py +++ b/app.py @@ -12,15 +12,37 @@ import json import util from auth import AuthProvider +from gitmgr import GitManagerConfiguration, GitManager startup_timestamp = datetime.now() +class AuthenticatedHandler(tornado.web.RequestHandler, metaclass=ABCMeta): + # noinspection PyAttributeOutsideInit + def initialize(self, auth_provider=None): + self.auth_provider = auth_provider + + def prepare(self): + if self.auth_provider is None: + return + + # check authentication + auth_hdr = "Authentication" + if auth_hdr not in self.request.headers: + raise tornado.web.HTTPError(401, reason="authentication not provided") + + tk = self.request.headers[auth_hdr] + + if not self.auth_provider.validate_token(tk): + raise tornado.web.HTTPError(403, reason="invalid authentication token provided") + + class HealthHandler(tornado.web.RequestHandler, metaclass=ABCMeta): # noinspection PyAttributeOutsideInit - def initialize(self): + def initialize(self, sources=None): self.git_version = self._load_git_version() + self.sources = sources @staticmethod def _load_git_version(): @@ -52,6 +74,12 @@ class HealthHandler(tornado.web.RequestHandler, metaclass=ABCMeta): health['timestamp'] = isodate.datetime_isoformat(datetime.now()) health['uptime'] = isodate.duration_isoformat(datetime.now() - startup_timestamp) + if self.sources: + for s in self.sources: + h = s() + if h is not None: + health = {**health, **h} + self.set_header("Content-Type", "application/json") self.write(json.dumps(health, indent=4)) self.set_status(200) @@ -69,11 +97,38 @@ class Oas3Handler(tornado.web.RequestHandler, metaclass=ABCMeta): self.finish() -def make_app(_auth_provider=None): +class AllEntitiesHandler(AuthenticatedHandler, metaclass=ABCMeta): + # noinspection PyAttributeOutsideInit + def initialize(self, auth_provider=None): + super().initialize(auth_provider) + + def post(self): + pass + + def get(self): + pass + + +class SingleEntityHandler(AuthenticatedHandler, metaclass=ABCMeta): + # noinspection PyAttributeOutsideInit + def initialize(self, auth_provider=None): + super().initialize(auth_provider) + + def post(self, identifier): + pass + + def get(self, identifier): + pass + + +def make_app(_auth_provider=None, gitmgr=None): version_path = r"/v[0-9]" return tornado.web.Application([ - (version_path + r"/health", HealthHandler), + (version_path + r"/health", HealthHandler, + {"sources": [lambda: {"git-head": gitmgr.head_sha}] if gitmgr else None}), (version_path + r"/oas3", Oas3Handler), + (version_path + r"/entities", AllEntitiesHandler, {"auth_provider": _auth_provider}), + (version_path + r"/entity/{.*}", SingleEntityHandler, {"auth_provider": _auth_provider}), ]) @@ -83,10 +138,16 @@ def main(): # Setup auth_provider = AuthProvider.from_environment() - util.run_tornado_server(make_app(auth_provider), + gitcfg = GitManagerConfiguration.from_environment() + gitmgr = GitManager(configuration=gitcfg) + gitmgr.setup() + gitmgr.printout() + + util.run_tornado_server(make_app(auth_provider, gitmgr), server_port=port) # Teardown + gitmgr.teardown() print("Server stopped") diff --git a/gitmgr.py b/gitmgr.py new file mode 100644 index 0000000..43506e2 --- /dev/null +++ b/gitmgr.py @@ -0,0 +1,188 @@ +import git +import os +import shutil +import tempfile +import time + +from util import load_env + + +class GitManagerConfiguration: + @staticmethod + def from_environment(): + origin = load_env("GIT_ORIGIN", None) + wc_path = load_env("GIT_WC_PATH", None) + git_pw = load_env("GIT_PASSWORD", None) + pull_intv = load_env("GIT_PULL_INTV", None) + + return GitManagerConfiguration(origin=origin, + git_pw=git_pw, + wc_path=wc_path, + pull_intv=pull_intv) + + def __init__(self, origin, git_pw=None, wc_path=None, pull_intv=None): + if not origin: + raise ValueError("Git origin cannot be empty!") + + self._origin = origin + self._git_pw = git_pw + self._wc_path = wc_path + self._pull_intv = 30 if pull_intv is None else int(pull_intv) + + @property + def origin(self): + return self._origin + + @property + def git_pw(self): + return self._git_pw + + @property + def wc_path(self): + return self._wc_path + + @property + def pull_intv(self): + return self._pull_intv + + +class GitManager: + def __init__(self, configuration): + if configuration is None: + raise ValueError("GitManager must be initialized with a configuration!") + + self._configuration = configuration + self._wc = None + self._last_pull = 0 + + @property + def configuration(self): + return self._configuration + + def _setup_wc(self): + if self._wc is not None: + return + + _wc = self.configuration.wc_path + + if _wc is None: + _wc = tempfile.mkdtemp(prefix='entities_git_') + + if not os.path.isdir(_wc): + raise ValueError("Configured directory for the working copy does not exist!") + + self._wc = _wc + + def _teardown_wc(self): + if self._wc is None: + return + + if self.configuration.wc_path is not None: + print("NOTE: Not tearing down externally configured working copy.") + return + + shutil.rmtree(self._wc) + + self._wc = None + + def _assert_wc(self): + """Assert working copy matches origin and is a valid repository. + + A failed assertion will throw exceptions and lead to service abort, + as this error is not recoverable. + + Returns False if the WC path is an empty directory""" + + # Check if WC is empty + if not os.listdir(self._wc): + return False + + # Create a repository object + # This fails if there is no valid repository + repo = git.Repo(self._wc) + + # Assert that this is not a bare repo + if repo.bare: + raise ValueError("WC path points to a bare git repository!") + + origin = repo.remote('origin') + if self.configuration.origin not in origin.urls: + raise ValueError("Origin URL does not match!") + + # We're good here. + return True + + def _askpass_script(self): + # Passwords are impossible to store in scripts, as they may contain any character ... + # We convert the password into a list of integers and create a little script + # that reconstructs the password and writes it to the console. + # Python will be installed anyways. + + pw_chars = [ord(c) for c in self.configuration.git_pw] + + script = "#!/usr/bin/env python3\n" + script += "l = %s\n" % str(list(pw_chars)) + script += "p = [chr(c) for c in l]\n" + script += f"print(\"\".join(p))\n" + return script + + def _init_repo(self): + # Assert working copy is valid, + # return false if cloning is necessary + if not self._assert_wc(): + print("Cloning new git working copy ...") + + # Create a temporary script file for GIT_ASKPASS + with tempfile.NamedTemporaryFile(mode='w+t') as askpass: + askpass.write(self._askpass_script()) + askpass.file.close() + os.chmod(path=askpass.name, mode=0o700) + self.repo = git.Repo.clone_from(url=self.configuration.origin, + to_path=self._wc, + env={'GIT_ASKPASS': askpass.name}) + else: + print("Reusing existing git working copy ...") + self.repo = git.Repo(self._wc) + + def setup(self): + self._setup_wc() + self._init_repo() + self.pull(force=True) + + def teardown(self): + self._teardown_wc() + + def printout(self): + print("Git Manager:") + print(f"\tGit origin is %s" % self.configuration.origin) + print(f"\tUsing working copy path %s" % self._wc) + if not self._wc == self.configuration.wc_path: + print("\tUsing a temporary working copy.") + + @property + def head_sha(self): + return None if self.repo is None else self.repo.head.object.hexsha + + def pull(self, force=False): + """Pull from origin. + + Arguments: + `force` -- Do a pull even though the pull interval has not elapsed + + Returns: True if pull was executed + """ + + if not force and (time.time() - self._last_pull < self.configuration.pull_intv): + return False + + self._last_pull = time.time() + + old_head = self.head_sha + + # get the origin + # (We verified during initialization that this origin exists.) + origin = self.repo.remote('origin') + + origin.pull(rebase=True) + + return self.head_sha != old_head diff --git a/requirements.txt b/requirements.txt index 757931f..b803341 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ tornado==6.0.4 isodate==0.6.0 pytest==5.4.1 +GitPython==3.1.12 \ No newline at end of file