diff options
-rw-r--r-- | ansible/roles/postgres/files/schema.sql | 104 | ||||
-rw-r--r-- | build/schema.sql | 104 | ||||
-rw-r--r-- | doc/api-2.0.rst | 331 | ||||
-rw-r--r-- | templating/README.md | 70 | ||||
-rwxr-xr-x | templating/templating.py | 37 | ||||
-rw-r--r-- | web/templates/test.conf | 17 |
6 files changed, 470 insertions, 193 deletions
diff --git a/ansible/roles/postgres/files/schema.sql b/ansible/roles/postgres/files/schema.sql index 60b312b..ef749b2 100644 --- a/ansible/roles/postgres/files/schema.sql +++ b/ansible/roles/postgres/files/schema.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 9.6.10 --- Dumped by pg_dump version 9.6.10 +-- Dumped from database version 9.6.11 +-- Dumped by pg_dump version 9.6.11 SET statement_timeout = 0; SET lock_timeout = 0; @@ -16,14 +16,14 @@ SET client_min_messages = warning; SET row_security = off; -- --- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: +-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: -- CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; -- --- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: +-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: -- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; @@ -150,11 +150,11 @@ ALTER SEQUENCE public.linknets_linknet_seq OWNED BY public.linknets.linknet; -- --- Name: metrics; Type: TABLE; Schema: public; Owner: postgres +-- Name: metrics; Type: TABLE; Schema: public; Owner: nms -- CREATE TABLE public.metrics ( - ts timestamp with time zone DEFAULT now(), + "time" timestamp with time zone DEFAULT now(), src text, metadata jsonb, data jsonb @@ -462,6 +462,13 @@ CREATE INDEX dhcp_time ON public.dhcp USING btree ("time"); -- +-- Name: metric_data; Type: INDEX; Schema: public; Owner: nms +-- + +CREATE INDEX metric_data ON public.metrics USING gin (data); + + +-- -- Name: ping_brin_time; Type: INDEX; Schema: public; Owner: nms -- @@ -640,84 +647,6 @@ ALTER TABLE ONLY public.ping -- --- Name: TABLE config; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.config TO dhcptail; - - --- --- Name: TABLE dhcp; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.dhcp TO dhcptail; - - --- --- Name: TABLE linknet_ping; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.linknet_ping TO dhcptail; - - --- --- Name: TABLE linknets; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.linknets TO dhcptail; - - --- --- Name: TABLE metrics; Type: ACL; Schema: public; Owner: postgres --- - -GRANT ALL ON TABLE public.metrics TO nms; - - --- --- Name: TABLE networks; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.networks TO dhcptail; - - --- --- Name: TABLE oplog; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.oplog TO dhcptail; - - --- --- Name: TABLE ping; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.ping TO dhcptail; - - --- --- Name: TABLE ping_secondary_ip; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.ping_secondary_ip TO dhcptail; - - --- --- Name: TABLE seen_mac; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.seen_mac TO dhcptail; - - --- --- Name: TABLE snmp; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.snmp TO postgres; -GRANT ALL ON TABLE public.snmp TO dhcptail; - - --- -- Name: SEQUENCE snmp_id_seq; Type: ACL; Schema: public; Owner: nms -- @@ -725,13 +654,6 @@ GRANT ALL ON SEQUENCE public.snmp_id_seq TO postgres; -- --- Name: TABLE switches; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.switches TO dhcptail; - - --- -- PostgreSQL database dump complete -- diff --git a/build/schema.sql b/build/schema.sql index 60b312b..ef749b2 100644 --- a/build/schema.sql +++ b/build/schema.sql @@ -2,8 +2,8 @@ -- PostgreSQL database dump -- --- Dumped from database version 9.6.10 --- Dumped by pg_dump version 9.6.10 +-- Dumped from database version 9.6.11 +-- Dumped by pg_dump version 9.6.11 SET statement_timeout = 0; SET lock_timeout = 0; @@ -16,14 +16,14 @@ SET client_min_messages = warning; SET row_security = off; -- --- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: +-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner: -- CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; -- --- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: +-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: -- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; @@ -150,11 +150,11 @@ ALTER SEQUENCE public.linknets_linknet_seq OWNED BY public.linknets.linknet; -- --- Name: metrics; Type: TABLE; Schema: public; Owner: postgres +-- Name: metrics; Type: TABLE; Schema: public; Owner: nms -- CREATE TABLE public.metrics ( - ts timestamp with time zone DEFAULT now(), + "time" timestamp with time zone DEFAULT now(), src text, metadata jsonb, data jsonb @@ -462,6 +462,13 @@ CREATE INDEX dhcp_time ON public.dhcp USING btree ("time"); -- +-- Name: metric_data; Type: INDEX; Schema: public; Owner: nms +-- + +CREATE INDEX metric_data ON public.metrics USING gin (data); + + +-- -- Name: ping_brin_time; Type: INDEX; Schema: public; Owner: nms -- @@ -640,84 +647,6 @@ ALTER TABLE ONLY public.ping -- --- Name: TABLE config; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.config TO dhcptail; - - --- --- Name: TABLE dhcp; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.dhcp TO dhcptail; - - --- --- Name: TABLE linknet_ping; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.linknet_ping TO dhcptail; - - --- --- Name: TABLE linknets; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.linknets TO dhcptail; - - --- --- Name: TABLE metrics; Type: ACL; Schema: public; Owner: postgres --- - -GRANT ALL ON TABLE public.metrics TO nms; - - --- --- Name: TABLE networks; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.networks TO dhcptail; - - --- --- Name: TABLE oplog; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.oplog TO dhcptail; - - --- --- Name: TABLE ping; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.ping TO dhcptail; - - --- --- Name: TABLE ping_secondary_ip; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.ping_secondary_ip TO dhcptail; - - --- --- Name: TABLE seen_mac; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.seen_mac TO dhcptail; - - --- --- Name: TABLE snmp; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.snmp TO postgres; -GRANT ALL ON TABLE public.snmp TO dhcptail; - - --- -- Name: SEQUENCE snmp_id_seq; Type: ACL; Schema: public; Owner: nms -- @@ -725,13 +654,6 @@ GRANT ALL ON SEQUENCE public.snmp_id_seq TO postgres; -- --- Name: TABLE switches; Type: ACL; Schema: public; Owner: nms --- - -GRANT ALL ON TABLE public.switches TO dhcptail; - - --- -- PostgreSQL database dump complete -- diff --git a/doc/api-2.0.rst b/doc/api-2.0.rst new file mode 100644 index 0000000..4d76098 --- /dev/null +++ b/doc/api-2.0.rst @@ -0,0 +1,331 @@ +New approach to Gondul API +========================== + +The current api is split in three/four: + +- /api/read - read/only access for sensitive data +- /api/public - read/only access for public data +- /api/write - write-only (authenticated) +- /templating or similar - for templating (read/sort-of-write-but-not-quite, sensitive) + +Today +----- + +(/a/ = /api/, /a/w/ = /api/write/, etc) + +- "all" API endponts for reading data supports?when=(date) to adjust what + the defintion of "now" is, to enable historic review. All end-points + return ETags calculated using the content of the data returned. + +- /a/p/config - this provides information mainly used to determine if this + is the public variant or not. There was an idea originally to extend this + with more configuration-data, but it never materialized. + +- /a/p/dhcp - returns the dhcp-specific data for each network/traffic VLAN, + used. Returns both the most recent timestamp and a count of seen leases. + +- /a/p/dhcp-summary - returns total number of dhcp leases seen recently. + Used mainly to show total number of active clients. + +- /a/p/distro-tree - returns two structures, distro-tree-phy maps distros + and their physical ports to access switches ("distro5": { "ge-0/0/4": + "e13-1" } ), and "distro-tree-sys" maps distros to sysnames and ports + ("distro5": { "e13-1": "ge-0-0/4" } ). Required to be able to easily look + up both how ports are connected and how switches are connected. + +- /a/p/location - Uses the source-ip of the request to return a HTML page + that determines which switch the request is made for. Used for "dhcp + testing"/"dhcp-løp" to ensure switches are actually hooked up correctly + (e.g.: Hook up to a switch, visit the page, verify that what it tells you + matches the physical label of the switch) + +- /a/p/ping - Returns latency stats for all switches + +- /a/p/switches - Returns a subset of the information we have for all + switches. Only returns public-data. + +- /a/p/switch-state - Returns a subset of data from SNMP, parsed and + filtered, including summaries for port groups like "clients" vs + "uplinks". Has a good bit of logic for filtering what should and + shouldn't be shown to the general public. + +- /a/r/networks - lists all networks/vlans/layer2 domains we have + +- /a/r/oplog - shows the oplog + +- /a/r/snmp - show raw snmp-data, no filtering + +- /a/r/switches-management - shows config for switches - the unfilitered + variant of /a/p/switches + +- /a/r/template-list - lists all available templates + +- /a/w/collector - simple skogul interface written to receive DHCP log + data. Should be replaced by an actual skogul instance. + +- /a/w/config - write-endpoint for updating event config, rarely used. + +- /a/w/networks - add/update networks + +- /a/w/oplog - add oplog entries + +- /a/w/switches - add/update switches + +Changes +------- + +The big changes suggested is: + +- Remove public interfaces from the "native" API. Consider adding public + nms as a filtered variant on top instead. + +- Do not use paths to distinguish write from read. + +- Do not natively deliver "two" data sets. If rates are needed. Make that + instead. + +- Create a new "ifmetrics" concept to extract interface metrics from SNMP + data, since it can also come from telemetry and other sources. + +- Leave all "rate" calculation out of the API. Instead, add integration + with influxdb under, e.g., /api/rates. + +- Option: Support ?then=now-5m or similar, which will then be cacheable, + and the client can then do two request (implicit ?then=now and a + ?then=now-1m) and compare. + + +Actual suggestion +----------------- + +I'm fairly convinced about: + +- /api/switches - Read/write interface for getting, updating and adding + switches. Read interface should be as identical to write interface as + possible. + +- /api/switches/some-switch - Similar, but for a single switch. + +- /api/networks - Ditto as switches + +- /api/networks/some-net - Similar, but for a single net + +- /api/oplog - Ditto + +- /api/snmp - GET all SNMP data available. + +- /api/snmp/some-switch - Get all SNMP data for a single switch + +I'm somewhat convinced about: + +- /api/ifmetrics - Get all interface metrics - regardless of source. Also + integrates the logic of "switch-state". If possible: Get "rates" for + relevant counters. + +- /api/ifmetrics/some-switch - Get all interface metrics for a single + switch + +- /api/ifmetrics/some-switch/port - Get metrics for a specific interface + for a specific switch. + +Less sure: + +- /api/templates/ - List all templates (in JSON format) + +- /api/templates/some-template - GET uncompiled template. Should optionally + support "Accept: application/json" to provide the data json-encoded as + well as "Accept: text/plain" for plain text/raw (default). + +- /templating/ GET the compiled template (uses templating.py) + +- /api/collector/{name} - POST url for relevant collector. Uses Skogul + JSON format (and implementation). + +- /api/collector/{dhcp,snmp,telemetry,ping,generic} - Some examples, where + "generic" will allow us to accept any data, and just stick it in some + general-purpose format or something. I have some more ideas about that. + +We could also consider implementing https://grafana.com/grafana/plugins/grafana-simple-json-datasource + +Progress +-------- + +We should get a basic API up in GO pretty fast, focusing on a single +end-point and get it right. E.g.: Get /api/switches right from the start. +All the 1-to-1 API-to-DB-table interfaces should be pretty much identical +code-wise. + +Next up is probably ping, simply because it is, well, simple. It means +re-factoring the collector to do HTTP POST, but that's a minor issue. + +Then I believe tackling SNMP and interfaces is important. + +Ifmetrics example +----------------- + +Interface metrics should be agnostic to SNMP vs Telemetry vs Magic. It will +therefore have a subset of curated fields. A spec needs to be written and +maintained that defines what is and isn't REQUIRED, so front-ends can +gracefully reduce functionality. + +Example, which WILL change during implementation:: + + { + "e13-1": { + "ge-0/0/1": { + "name": "ge-0/0/1", + "snmp_if_index": 1234, + "ifHighSpeed": 10000, + "if_operational_status": "UP", + "parent_ae_name": "ae95", + "description": "alias|name?", + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "rates": { + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + } + } + }, + "ge-0/0/2": {....} + }, + "e15-1": { + "ge-0/0/1": { + "name": "ge-0/0/1", + "snmp_if_index": 1234, + "ifHighSpeed": 10000, + "if_operational_status": "UP", + "parent_ae_name": "ae95", + "description": "alias|name?", + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "rates": { + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + } + } + }, + "ge-0/0/2": {....} + } + } + +Requesting /api/ifmetrics/e15-1 would give:: + + { + "ge-0/0/1": { + "name": "ge-0/0/1", + "snmp_if_index": 1234, + "ifHighSpeed": 10000, + "if_operational_status": "UP", + "parent_ae_name": "ae95", + "description": "alias|name?", + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "rates": { + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + } + } + }, + "ge-0/0/2": {....} + } + +And /api/ifmetrics/e15-1/ge-0/0/1 :: + + { + "name": "ge-0/0/1", + "snmp_if_index": 1234, + "ifHighSpeed": 10000, + "if_operational_status": "UP", + "parent_ae_name": "ae95", + "description": "alias|name?", + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "rates": { + "ingress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + }, + "egress": { + "octets": 125, + "errors": 5, + "discards": 0, + ... + } + } + } + +Some issues remains: There should be an idea of totals, for convenience. +Some metadata regarding precision of rates (e.g.: number of measurements or +something), and various other enrichments. So the exact details here might +need some refinement. + diff --git a/templating/README.md b/templating/README.md new file mode 100644 index 0000000..89f472e --- /dev/null +++ b/templating/README.md @@ -0,0 +1,70 @@ +# Templating Engine + +This engine does the templating for The Gathering. + +The flask server sits behind varnish and waits for incomming GET requests with a template name and optional variables. + +```varnish +backend templating { + .host = "::1"; + .port = "8081"; +} +.... +if (req.url ~ "^/api/templates") { + set req.url = regsub(req.url,"^/api/templates",""); + set req.backend_hint = templating; +} +``` + +## Requirements + +* Python3.6 +* jinja2 +* requests +* flask +* netaddr + +## Settings + +``` +python3 templating.py +usage: templating.py [-t TEMPLATES [TEMPLATES ...]] [-h HOST] [-p PORT] [-d] + [-s SERVER] [-x TIMEOUT] + +Process templates for gondul. + +optional arguments: + -t TEMPLATES [TEMPLATES ...], --templates TEMPLATES [TEMPLATES ...] + location of templates + -h HOST, --host HOST host address + -p PORT, --port PORT host port + -d, --debug enable debug mode + -s SERVER, --server SERVER + gondul server address + -x TIMEOUT, --timeout TIMEOUT + gondul server timeout +``` + +## How to test locally + +You need a directory with all the jinja2 templates. I just assume you git cloned the entire gondul repo. + +An example using [test.conf](../web/templates/test.conf) + +```bash +python3 templating.py --host ::1 --port 8081 --templates ../web/templates --server http://tech:rules@<gondul>:80 +``` + +```bash +curl -s "http://[::1]:8081/test.conf?switch=e1-1" | jq . +{ + "distro_name": "core-dev", + "placement": { + "height": 20, + "width": 250, + "x": "830", + "y": "620" + }, + "tags": [] +} +``` diff --git a/templating/templating.py b/templating/templating.py index 87853b6..1eff4a3 100755 --- a/templating/templating.py +++ b/templating/templating.py @@ -8,20 +8,26 @@ import netaddr import requests from flask import Flask, request -from jinja2 import Environment, FileSystemLoader, TemplateNotFound +from jinja2 import Environment, FileSystemLoader, TemplateNotFound, TemplateError -endpoints = "read/networks read/oplog read/snmp read/switches-management public/distro-tree public/config public/dhcp public/dhcp-summary public/ping public/switches public/switch-state".split() +endpoints = ["read/networks", "read/oplog", "read/snmp", "read/switches-management", "public/distro-tree", + "public/config", "public/dhcp", "public/dhcp-summary", "public/ping", "public/switches", + "public/switch-state"] objects = {} -def getEndpoint(endpoint): - r = requests.get("http://localhost:80/api/{}".format(endpoint)) - if r.status_code != 200: - raise Exception("Bad status code for endpoint {}: {}".format(endpoint, r.status_code)) - return r.json() +def getEndpoint(endpoint: str) -> dict: + """ + Fetches an endpoint and returns the data as a dict. + """ + uri = f"{args.server}/api/{endpoint}" + r = requests.get(uri, timeout=args.timeout) + r.raise_for_status() + return r.json() + def updateData(): for a in endpoints: objects[a] = getEndpoint(a) @@ -52,14 +58,20 @@ def add_header(response): @app.route("/<path>", methods=["GET"]) def root_get(path): - updateData() try: + updateData() template = env.get_template(path) body = template.render(objects=objects, options=request.args) + except (requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError) as error: + return f'Timeout or connection error from gondul: {err}', 500 except TemplateNotFound: - return 'Template "{}" not found\n'.format(path), 404 + return f'Template "{path}" not found\n', 404 + except TemplateError as err: + return f'Templating of "{path}" failed to render. Most likely due to an error in the template. Error transcript:\n\n{err}\n----\n\n{traceback.format_exc()}\n', 400 + except requests.exceptions.HTTPError as err: + return f'HTTP error from gondul: {err}', 500 except Exception as err: - return 'Templating of "{}" failed to render. Most likely due to an error in the template. Error transcript:\n\n{}\n----\n\n{}\n'.format(path, err, traceback.format_exc()), 400 + return f'Uncaught error: {err}', 500 return body, 200 @@ -71,7 +83,7 @@ def root_post(path): template = env.from_string(content.decode("utf-8")) body = template.render(objects=objects, options=request.args) except Exception as err: - return 'Templating of "{}" failed to render. Most likely due to an error in the template. Error transcript:\n\n{}\n----\n\n{}\n'.format(path, err, traceback.format_exc()), 400 + return 'Templating of "{path}" failed to render. Most likely due to an error in the template. Error transcript:\n\n{err}\n----\n\n{traceback.format_exc()}\n', 400 return body, 200 @@ -80,11 +92,14 @@ parser.add_argument("-t", "--templates", type=str, nargs="+", help="location of parser.add_argument("-h", "--host", type=str, default="127.0.0.1", help="host address") parser.add_argument("-p", "--port", type=int, default=8080, help="host port") parser.add_argument("-d", "--debug", action="store_true", help="enable debug mode") +parser.add_argument("-s", "--server", type=str, default="http://localhost:80", help="gondul server address") +parser.add_argument("-x", "--timeout", type=int, default=2, help="gondul server timeout") args = parser.parse_args() env.loader.searchpath = args.templates if not sys.argv[1:]: parser.print_help() + sys.exit(1) app.run(host=args.host, port=args.port, debug=args.debug) diff --git a/web/templates/test.conf b/web/templates/test.conf new file mode 100644 index 0000000..ce2fe0d --- /dev/null +++ b/web/templates/test.conf @@ -0,0 +1,17 @@ +{# Fetches something simple from gondul #} +{# Query parameters: ?switch=e1-1 #} + +{# Check if ?switch option is given#} +{% if not options["switch"] %} + {# pretty print public/switches endpoint#} + {{ objects["public/switches"] | pprint }} +{% else %} + {# sets sw variable using the query parameter #} + {% set sw = options["switch"] %} +{% endif %} +{% if sw %} + {# find the correct switch from public/switches api endpoint using query paramter#} + {% set switch = objects["public/switches"].switches[sw] %} + {# pretty print info in api #} + {{ switch | tojson }} +{% endif %} |