From e9bb14da18b1f9945b171d45df6d09e77d4c4b93 Mon Sep 17 00:00:00 2001
From: matevz_erzen <matevz.erzen@xlab.si>
Date: Fri, 1 Oct 2021 12:37:15 +0200
Subject: [PATCH] Added scheduling and proto files

---
 .gitignore                                    |   1 +
 Dockerfile                                    |   6 +-
 README.md                                     |  58 ++++++++-
 constants/constants.py                        |   6 +-
 entrypoint.sh                                 |   9 ++
 evidence/evidence.py                          |   8 +-
 evidence/evidence_pb2.py                      | 117 ++++++++++++++++++
 proto/evidence.proto                          |  30 +++++
 requirements.txt                              |   7 +-
 scheduler/scheduler.py                        |  35 ++++++
 test.py                                       |   7 --
 wazuh_evidence_collector/wazuh_client.py      |   2 +-
 .../wazuh_evidence_collector.py               |   8 +-
 13 files changed, 270 insertions(+), 24 deletions(-)
 create mode 100755 entrypoint.sh
 create mode 100644 evidence/evidence_pb2.py
 create mode 100644 proto/evidence.proto
 create mode 100644 scheduler/scheduler.py
 delete mode 100644 test.py

diff --git a/.gitignore b/.gitignore
index dd8561b..3d204c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ __pycache__/
 *.pyc
 *$py.class
 .idea/
+dump.rdb
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index d68658f..682c0c4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,11 +2,13 @@
 
 FROM python:3.8-slim-buster
 
-WORKDIR /evidence-collector/
+WORKDIR /evidence-collector
 
 COPY requirements.txt requirements.txt
 RUN pip3 install -r requirements.txt
 
 COPY . .
 
-CMD ["python3", "test.py"]
\ No newline at end of file
+RUN apt-get update && apt-get install -y redis-server
+
+ENTRYPOINT ["entrypoint.sh"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 268a3c0..5afabb4 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,11 @@ Wazuh evidence collector uses [Wazuh's API](https://documentation.wazuh.com/curr
 
 ## Installation & use
 
-### Using docker:
+### Using docker
 
-1. Set up your Wazuh development environment. Use [Security Monitoring](https://gitlab.xlab.si/medina/security-monitoring) repository to create and deploy Vagrant box with all required components.
+> Note: Docker image is not yet complete and might not work due to recent changes around scheduler etc.
+
+1. Set up your Wazuh development environment. Use [Security Monitoring](https://gitlab.xlab.si/medina/security-monitoring) repository to create and deploy Vagrant box with all the required components.
 
 2. Clone this repository.
 
@@ -28,7 +30,7 @@ docker run evidence-collector
 
 > Note: Current simple image runs code from `test.py`. If you wish to test anything else, change this file or edit `Dockerfile`.
 
-### Local environment:
+### Local environment
 
 1. Set up your Wazuh development environment. Use [Security Monitoring](https://gitlab.xlab.si/medina/security-monitoring) repository to create and deploy Vagrant box with all required components.
 
@@ -40,13 +42,21 @@ docker run evidence-collector
 pip install -r requirements.txt
 ```
 
-4. Run `test.py`:
+4. Install Redis server (or run it in a separate Docker container - in this case remove server start command from `entrypoint.sh`):
+
+```
+sudo apt-get install redis-server
+```
+
+> Note: To stop Redis server use `/etc/init.d/redis-server stop`.
+
+5. Run `entrypoint.sh`"
 
 ```
-python3 test.py
+./entrypoint.sh
 ```
 
-> Note: This repository consists of multiple modules. When running code manually, use of `-m` flag might be necessary.
+> Note: This repository consists of multiple Python modules. When running Python code manually, use of `-m` flag might be necessary.
 
 ### API User authentication
 
@@ -68,6 +78,42 @@ curl --user admin:changeme --insecure -X GET "https://192.168.33.10:9200/wazuh-a
 }'
 ```
 
+### Running [RQ](https://github.com/rq/rq) and [RQ-scheduler](https://github.com/rq/rq-scheduler) localy
+
+1. Install (if needed) and run `redis-server`:
+
+```
+sudo apt-get install redis-server
+
+redis-server
+```
+
+> Note: By default, server listens on port `6379`. Take this into consideration when starting other components.
+
+2. Install RQ and RQ-scheduler:
+
+```
+pip install rq
+
+pip install rq-scheduler
+```
+
+3. Run both components in 2 terminals:
+
+```
+rqworker low
+
+rqscheduler --host localhost --port 6379
+```
+
+> Note: `low` in the first command references task queue worker will use.
+
+4. Run Python script containing RQ commands as usual:
+
+```
+python3 ...
+```
+
 ## Known issues
 
 ### Python Elasticsearch library problems with ODFE
diff --git a/constants/constants.py b/constants/constants.py
index d4185e3..247023e 100644
--- a/constants/constants.py
+++ b/constants/constants.py
@@ -6,4 +6,8 @@ WAZUH_PASSWORD = 'wazuh-wui'
 ELASTIC_IP = '192.168.33.10'
 ELASTIC_API_PORT = 9200
 ELASTIC_USERNAME = 'admin'
-ELASTIC_PASSWORD = 'changeme'
\ No newline at end of file
+ELASTIC_PASSWORD = 'changeme'
+
+REDIS_IP = 'localhost'
+REDIS_PORT = '6379'
+REDIS_QUEUE_NAME = 'low'
\ No newline at end of file
diff --git a/entrypoint.sh b/entrypoint.sh
new file mode 100755
index 0000000..32f3c3e
--- /dev/null
+++ b/entrypoint.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+redis-server &
+
+rqworker low &
+
+rqscheduler &
+
+python3 -m scheduler.scheduler &
\ No newline at end of file
diff --git a/evidence/evidence.py b/evidence/evidence.py
index e5e906f..7cd6092 100644
--- a/evidence/evidence.py
+++ b/evidence/evidence.py
@@ -2,7 +2,7 @@ import json
 
 class Evidence:
 
-    def __init__(self, evidence_id, timestamp, resource_id, tool, resource_type, feature_type, feature_property, measurement_result, body):
+    def __init__(self, evidence_id, timestamp, resource_id, tool, resource_type, feature_type, feature_property, measurement_result, raw):
         self.evidence_id = evidence_id
         self.timestamp = timestamp
         self.resource_id = resource_id
@@ -11,11 +11,11 @@ class Evidence:
         self.feature_type = feature_type
         self.feature_property = feature_property
         self.measurement_result = measurement_result
-        self.body = body
+        self.raw = raw
 
     def toJson(self):
         return json.dumps(self.__dict__)
 
-def simple_evidence(evidence_id, timestamp, resource_id, measurement_result, body):
-    return Evidence(evidence_id, timestamp, resource_id, None, None, None, None, measurement_result, body)
+def simple_evidence(evidence_id, timestamp, resource_id, measurement_result, raw):
+    return Evidence(evidence_id, timestamp, resource_id, None, None, None, None, measurement_result, raw)
     
\ No newline at end of file
diff --git a/evidence/evidence_pb2.py b/evidence/evidence_pb2.py
new file mode 100644
index 0000000..f97171c
--- /dev/null
+++ b/evidence/evidence_pb2.py
@@ -0,0 +1,117 @@
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: proto/evidence.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
+from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='proto/evidence.proto',
+  package='',
+  syntax='proto3',
+  serialized_options=_b('Z\010evidence'),
+  serialized_pb=_b('\n\x14proto/evidence.proto\x1a\x1cgoogle/protobuf/struct.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"\xc1\x01\n\x08\x45vidence\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nservice_id\x18\x02 \x01(\t\x12\x13\n\x0bresource_id\x18\x03 \x01(\t\x12-\n\ttimestamp\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x1a\n\x12\x61pplicable_metrics\x18\x05 \x03(\x05\x12\x0b\n\x03raw\x18\x06 \x01(\t\x12(\n\x08resource\x18\x07 \x01(\x0b\x32\x16.google.protobuf.ValueB\nZ\x08\x65videnceb\x06proto3')
+  ,
+  dependencies=[google_dot_protobuf_dot_struct__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,])
+
+
+
+
+_EVIDENCE = _descriptor.Descriptor(
+  name='Evidence',
+  full_name='Evidence',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='id', full_name='Evidence.id', index=0,
+      number=1, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='service_id', full_name='Evidence.service_id', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='resource_id', full_name='Evidence.resource_id', index=2,
+      number=3, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='timestamp', full_name='Evidence.timestamp', index=3,
+      number=4, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='applicable_metrics', full_name='Evidence.applicable_metrics', index=4,
+      number=5, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='raw', full_name='Evidence.raw', index=5,
+      number=6, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='resource', full_name='Evidence.resource', index=6,
+      number=7, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=88,
+  serialized_end=281,
+)
+
+_EVIDENCE.fields_by_name['timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
+_EVIDENCE.fields_by_name['resource'].message_type = google_dot_protobuf_dot_struct__pb2._VALUE
+DESCRIPTOR.message_types_by_name['Evidence'] = _EVIDENCE
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+Evidence = _reflection.GeneratedProtocolMessageType('Evidence', (_message.Message,), dict(
+  DESCRIPTOR = _EVIDENCE,
+  __module__ = 'proto.evidence_pb2'
+  # @@protoc_insertion_point(class_scope:Evidence)
+  ))
+_sym_db.RegisterMessage(Evidence)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
diff --git a/proto/evidence.proto b/proto/evidence.proto
new file mode 100644
index 0000000..ddd6159
--- /dev/null
+++ b/proto/evidence.proto
@@ -0,0 +1,30 @@
+syntax = "proto3";
+
+import "google/protobuf/struct.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "evidence";
+
+// TODO
+// Coppied from https://github.com/clouditor/clouditor/blob/main/proto/evidence.proto
+message Evidence {
+    string id = 1;
+  
+    string service_id = 2;
+  
+    string resource_id = 3;
+  
+    // TODO: replace with google/type/date.proto timestamp.proto or date.proto?
+    google.protobuf.Timestamp timestamp = 4;
+  
+    repeated int32 applicable_metrics = 5;
+  
+    // "raw" evidence (for the auditor), for example the raw JSON response from
+    // the API. This does not follow a defined schema
+    string raw = 6;
+  
+    // optional; a semantic representation of the Cloud resource according to our
+    // defined ontology. a JSON serialized node of our semantic graph. This may be
+    // Clouditor-specific.
+    google.protobuf.Value resource = 7;
+  }
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index d63607f..97964fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
-elasticsearch_dsl==7.4.0
-urllib3==1.25.8
 elasticsearch==7.13.4
+urllib3==1.25.8
+redis==3.5.3
+elasticsearch_dsl==7.4.0
+rq==1.10.0
+rq_scheduler==0.11.0
diff --git a/scheduler/scheduler.py b/scheduler/scheduler.py
new file mode 100644
index 0000000..5f5c0ee
--- /dev/null
+++ b/scheduler/scheduler.py
@@ -0,0 +1,35 @@
+from redis import Redis
+from rq import Queue
+from rq_scheduler import Scheduler
+from constants import constants
+from wazuh_evidence_collector import wazuh_evidence_collector
+
+def remove_jobs(scheduler):
+    jobs = scheduler.get_jobs()
+    for job in jobs:
+        scheduler.cancel(job)
+
+def print_jobs(scheduler):
+    jobs = scheduler.get_jobs()
+    for job in jobs:
+        print(job)
+
+redis = Redis(constants.REDIS_IP, constants.REDIS_PORT)
+q = Queue(constants.REDIS_QUEUE_NAME, connection=redis)
+scheduler = Scheduler(connection=redis)
+
+# TODO: Remove if needed
+remove_jobs(scheduler)
+
+# TODO: Change cron expression and repeat value for production verion.
+scheduler.cron(
+    '* * * * * ',               
+    func=wazuh_evidence_collector.run_full_check,                 
+    args=[],
+    repeat=10,
+    queue_name='low',
+    use_local_timezone=False
+)
+
+# TODO: Remove if needed
+print_jobs(scheduler)
diff --git a/test.py b/test.py
deleted file mode 100644
index 4c76f3a..0000000
--- a/test.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import pprint
-from  wazuh_evidence_collector.wazuh_evidence_collector import *
-
-evidences = run_full_check()
-
-for evidence in evidences:
-    pprint.pprint(evidence.__dict__)
diff --git a/wazuh_evidence_collector/wazuh_client.py b/wazuh_evidence_collector/wazuh_client.py
index fe1eb31..d002751 100644
--- a/wazuh_evidence_collector/wazuh_client.py
+++ b/wazuh_evidence_collector/wazuh_client.py
@@ -11,7 +11,7 @@ class WazuhClient:
 		self._auth_token = None
 	
 	def req(self, method, resource, data=None, headers={}, auth_retry=True):
-		# TODO add cert verification
+		# TODO: add cert verification
 		c = urllib3.HTTPSConnectionPool(self._ip, port=self._port, cert_reqs='CERT_NONE', assert_hostname=False)
 		url = "https://%s:%i/%s" % (self._ip, self._port, resource)
 		
diff --git a/wazuh_evidence_collector/wazuh_evidence_collector.py b/wazuh_evidence_collector/wazuh_evidence_collector.py
index 110588a..9372fd0 100644
--- a/wazuh_evidence_collector/wazuh_evidence_collector.py
+++ b/wazuh_evidence_collector/wazuh_evidence_collector.py
@@ -19,6 +19,7 @@ es = Elasticsearch(
         ssl_show_warn=False,
     )
 
+# TODO: Get real data.
 # Get (temporary) ID
 def get_id(reqId):
     return reqId + '-' + str(randint(0, maxsize))
@@ -50,6 +51,11 @@ def run_full_check():
         agent_evidences.append(wazuh_monitoring_enabled(wc, agent))
         agent_evidences.append(malvare_protection_enabled(wc, es, agent))
 
+    # TODO: : Remove for production. This is only output for easier local testing.
+    for evidence in agent_evidences:
+        pprint.pprint(evidence.toJson())
+
+    
     return agent_evidences
 
 # Check Wazuh's configuration
@@ -181,7 +187,7 @@ def malvare_protection_enabled(wc, es, agent_id):
         return simple_evidence(get_id('05.4'), get_timestamp(), agent_id, "false", raw_evidence)
 
 # Check last Syscheck & Rootcheck scan times
-# When producing 'real' evidence, make sure to provide differentiation between Syscheck and Rootcheck outputs.
+# TODO: When producing 'real' evidence, make sure to provide differentiation between Syscheck and Rootcheck outputs.
 def check_last_scan_time(wc, agent_id):
     body = wc.req('GET', 'syscheck/' + agent_id + '/last_scan')
 
-- 
GitLab