Skip to content
Snippets Groups Projects
Commit 83b52ef2 authored by Etxaniz Errazkin, Iñaki's avatar Etxaniz Errazkin, Iñaki
Browse files

push to public

parents
No related branches found
No related tags found
No related merge requests found
!target/transforming-xlsx.jar
CHANGELOG.md merge=union
*.iml
.vertx/
logs/
target/
.idea/
nbactions.xml
nb-configuration.xml
*.bat
FROM maven:3.6.3-openjdk-14-slim AS builder
WORKDIR /home/app
COPY pom.xml .
RUN mvn dependency:go-offline
COPY src src/
RUN mvn clean package
FROM openjdk:14-jdk
ENV VERTICLE_FILE transforming-xlsx.jar
# Set the location of the verticles
ENV VERTICLE_HOME /usr/verticles
RUN mkdir $VERTICLE_HOME
EXPOSE 8080
COPY --from=builder /home/app/target/$VERTICLE_FILE /$VERTICLE_HOME/$VERTICLE_FILE
RUN groupadd vertx && useradd -g vertx vertx
RUN chown -R vertx $VERTICLE_HOME
RUN chmod -R g+w $VERTICLE_HOME
USER vertx
# Launch the verticle
WORKDIR $VERTICLE_HOME
ENTRYPOINT ["sh", "-c"]
CMD ["exec java $JAVA_OPTS -jar $VERTICLE_FILE"]
Copyright 2019 Fraunhofer FOKUS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# Transforming XLSX
Microservice for transforming XSLX files data to JSON.
Example of a result:
```json
{
"sheet_1": [
[ "ID", "Name", "Age" ],
[ "1", "Jane Doe", 29 ],
[ "2", "John Doe", 35 ]
],
"sheet_2": [
...
]
}
```
## Table of Contents
1. [Build](#build)
1. [Run](#run)
1. [Configuration](#configuration)
1. [Pipe](#pipe)
1. [License](#license)
## Build
Requirements:
* Git
* Docker
```bash
$ git clone <repo-uri>
$ cd <repo>
$ docker build -t piveau/<service-name> .
```
## Run
```bash
$ docker run -it -p 8080:8080 piveau/<service-name>
```
## Configuration
### Pipe
_optional_
* `skipEmptyRows`
Flag indicating whether to include empty rows of a sheet as empty arrays. Defaults to `false`.
## License
[Apache License, Version 2.0](LICENSE.md)
pom.xml 0 → 100644
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>io.piveau.consus</groupId>
<artifactId>transforming-xlsx</artifactId>
<version>1.0.0</version>
<name>piveau-consus-transforming-xlsx</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format>
<maven.compiler.release>11</maven.compiler.release>
<buildTimestamp>${maven.build.timestamp}</buildTimestamp>
<vertx.version>4.0.3</vertx.version>
<vertx.verticle>io.piveau.transforming.MainVerticle</vertx.verticle>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>io.vertx</groupId>
<artifactId>vertx-stack-depchain</artifactId>
<version>${vertx.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<repositories>
<repository>
<id>paca</id>
<name>paca</name>
<url>https://paca.fokus.fraunhofer.de/repository/maven-public/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>io.vertx</groupId>
<artifactId>vertx-core</artifactId>
</dependency>
<dependency>
<groupId>io.vertx</groupId>
<artifactId>vertx-config</artifactId>
</dependency>
<dependency>
<groupId>io.piveau.utils</groupId>
<artifactId>piveau-utils</artifactId>
<version>7.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>io.piveau.pipe</groupId>
<artifactId>pipe-connector</artifactId>
<version>5.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.janino</groupId>
<artifactId>janino</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>net.logstash.logback</groupId>
<artifactId>logstash-logback-encoder</artifactId>
<version>6.6</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
</dependencies>
<build>
<finalName>${project.artifactId}</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</plugin>
<plugin>
<groupId>io.reactiverse</groupId>
<artifactId>vertx-maven-plugin</artifactId>
<version>1.0.23</version>
<executions>
<execution>
<id>vmp</id>
<goals>
<goal>initialize</goal>
<goal>package</goal>
</goals>
</execution>
</executions>
<configuration>
<redeploy>true</redeploy>
</configuration>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.0.0-M5</version>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.6</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package io.piveau.transforming;
import io.piveau.pipe.connector.PipeConnector;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.DeploymentOptions;
import io.vertx.core.Launcher;
import io.vertx.core.Promise;
import java.util.Arrays;
public class MainVerticle extends AbstractVerticle {
@Override
public void start(Promise<Void> startPromise) {
vertx.deployVerticle(XlsxTransformingVerticle.class, new DeploymentOptions().setWorker(true).setWorkerPoolSize(100))
.compose(id -> PipeConnector.create(vertx))
.onSuccess(connector -> {
connector.publishTo(XlsxTransformingVerticle.ADDRESS);
startPromise.complete();
})
.onFailure(startPromise::fail);
}
public static void main(String[] args) {
String[] params = Arrays.copyOf(args, args.length + 1);
params[params.length - 1] = MainVerticle.class.getName();
Launcher.executeCommand("run", params);
}
}
package io.piveau.transforming;
import io.piveau.pipe.PipeContext;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.Promise;
import io.vertx.core.eventbus.Message;
import io.vertx.core.json.JsonArray;
import io.vertx.core.json.JsonObject;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
public class XlsxTransformingVerticle extends AbstractVerticle {
private final Logger log = LoggerFactory.getLogger(getClass());
public static final String ADDRESS = "io.piveau.pipe.transformation.xlsx.queue";
@Override
public void start(Promise<Void> startPromise) {
vertx.eventBus().consumer(ADDRESS, this::handlePipe);
startPromise.complete();
}
private void handlePipe(Message<PipeContext> message) {
PipeContext pipeContext = message.body();
pipeContext.log().trace("Incoming pipe");
JsonObject info = pipeContext.getDataInfo();
try (InputStream input = new ByteArrayInputStream(pipeContext.getBinaryData())) {
XSSFWorkbook workbook = new XSSFWorkbook(input);
JsonObject result = new JsonObject();
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
Sheet sheet = workbook.getSheetAt(i);
JsonArray sheetArray = new JsonArray();
for (int j = sheet.getFirstRowNum(); j < sheet.getLastRowNum(); j++) {
Row row = sheet.getRow(j);
boolean skipEmptyRows = pipeContext.getConfig().getBoolean("skipEmptyRows", true);
if (row != null) {
JsonArray rowArray = new JsonArray();
row.cellIterator().forEachRemaining(cell -> rowArray.add(cellToObject(cell)));
if (!rowArray.isEmpty() || skipEmptyRows)
sheetArray.add(rowArray);
}
}
result.put(sheet.getSheetName().toLowerCase(), sheetArray);
}
pipeContext.log().debug("Transformation result:\n{}", result.encodePrettily());
pipeContext.setResult(result.encodePrettily(), "application/json", info).forward();
pipeContext.log().info("Data transformed: {}", info);
} catch (IOException e) {
log.error("transforming data", e);
pipeContext.log().error(info.toString(), e);
}
}
private Object cellToObject(Cell cell) {
switch (cell.getCellType()) {
case BOOLEAN:
return cell.getBooleanCellValue();
case NUMERIC:
return cell.getNumericCellValue();
case STRING:
default:
return cell.getStringCellValue();
}
}
}
{
"timestamp": "${buildTimestamp}",
"version": "${project.version}"
}
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://io.piveau/resources/transforming-js/config.schema.json",
"title": "transforming-js configuration",
"description": "piveau pipe segment config object schema",
"type": "object",
"oneOf": [
{
"required": [ "scriptRepository" ]
},
{
"required": [ "script" ]
}
],
"properties": {
"oneOf": {
"scriptRepository": {
"type": "object",
"title": "The repository where the script resides",
"required": [ "uri", "script" ],
"properties": {
"uri": {
"type": "string",
"format": "uri",
"title": "The URI reference to the repository"
},
"branch": {
"type": "string",
"title": "The branch to use",
"default": "master"
},
"username": {
"type": "string",
"title": "The username to authenticate"
},
"token": {
"type": "string",
"title": "The authentication token"
},
"script": {
"type": "string",
"title": "Path to script file"
}
}
},
"script": {
"type": "string",
"title": "The embedded script"
}
},
"single": {
"type": "boolean",
"default": false
},
"params": {
"type": "string",
"title": "Params as stringified json"
},
"outputFormat": {
"enum": [
"application/rdf+xml",
"application/n-triples",
"application/ld+json",
"application/trig",
"text/turtle",
"text/n3"
],
"title": "The output format",
"default": "application/n-triples"
}
}
}
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<property name="pipeAppender" value="${PIVEAU_PIPE_LOG_APPENDER:-STDOUT}" />
<if condition='property("pipeAppender").equals("LOGSTASH")'>
<then>
<appender name="LOGSTASH" class="net.logstash.logback.appender.LogstashTcpSocketAppender">
<destination>${PIVEAU_LOGSTASH_HOST:-logstash}:${PIVEAU_LOGSTASH_PORT:-5044}</destination>
<encoder class="net.logstash.logback.encoder.LogstashEncoder" />
</appender>
</then>
</if>
<if condition='property("pipeAppender").equals("PIPEFILE")'>
<then>
<appender name="PIPEFILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<prudent>true</prudent>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${PIVEAU_PIPE_LOG_PATH:-logs/piveau-pipe.%d{yyyy-MM-dd}.log}</fileNamePattern>
<!-- keep 30 days' worth of history capped at 3GB total size -->
<maxHistory>10</maxHistory>
<totalSizeCap>1GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>[%d{yyyy-MM-dd HH:mm:ss.SSS}] %-5level %logger %msg%n</pattern>
</encoder>
</appender>
</then>
</if>
<logger name="com.mchange.v2" level="warn"/>
<logger name="io.netty" level="warn"/>
<logger name="io.vertx" level="info"/>
<logger name="com.hazelcast" level="debug"/>
<logger name="io.piveau" level="${PIVEAU_LOG_LEVEL:-INFO}"/>
<logger name="pipe" level="${PIVEAU_PIPE_LOG_LEVEL:-INFO}" additivity="false">
<appender-ref ref="${pipeAppender}" />
</logger>
<root level="info">
<appender-ref ref="STDOUT" />
</root>
</configuration>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment