Implement basic conversion support (doc + docx -> pdf)

wip
Jonas Letzbor 2022-09-23 11:58:53 +02:00
parent 91f2e1abcb
commit bfc1ad40a5
10 changed files with 398 additions and 20 deletions

1
.gitignore vendored
View File

@ -23,6 +23,7 @@ go.work
# Locally used configuration file # Locally used configuration file
/config.yaml /config.yaml
/ncConverter.json
# Vite build file # Vite build file
/.vite /.vite

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"rpjosh.de/ncDocConverter/internal/models" "rpjosh.de/ncDocConverter/internal/models"
"rpjosh.de/ncDocConverter/internal/ncworker"
"rpjosh.de/ncDocConverter/pkg/logger" "rpjosh.de/ncDocConverter/pkg/logger"
) )
@ -50,6 +51,16 @@ func main() {
WriteTimeout: 10 * time.Second, WriteTimeout: 10 * time.Second,
} }
ncConvertUsers, err := models.ParseConvertUsers("./ncConverter.json")
if err != nil {
logger.Error("Unable to parse the file %s: %s", "dd", err)
}
ncworker.NewScheduler(ncConvertUsers)
if 1 == 1 {
return
}
logger.Info("Server started on %s", config.Server.Address) logger.Info("Server started on %s", config.Server.Address)
var errw error var errw error
if config.Server.Certificate == "" { if config.Server.Certificate == "" {

1
go.mod
View File

@ -11,3 +11,4 @@ require (
) )
// https://zhwt.github.io/yaml-to-go/ // https://zhwt.github.io/yaml-to-go/
// https://www.onlinetool.io/xmltogo/

View File

@ -12,6 +12,7 @@ type User struct {
NextcloudBaseUrl string`json:"nextcloudUrl"` NextcloudBaseUrl string`json:"nextcloudUrl"`
Username string`json:"username"` Username string`json:"username"`
Password string`json:"password"` Password string`json:"password"`
ConvertJobs []ConvertJob`json:"jobs"`
} }
type ConvertJob struct { type ConvertJob struct {
@ -23,14 +24,14 @@ type ConvertJob struct {
Executions []string`json:"execution"` Executions []string`json:"execution"`
} }
type NcConverter struct { type NcConvertUsers struct {
Users []User`json:"users"` Users []User`json:"users"`
} }
// Parses the given file to the in memory struct // Parses the given file to the in memory struct
func ParseUsers(filePath string) (*NcConverter, error) { func ParseConvertUsers(filePath string) (*NcConvertUsers, error) {
file, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) file, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE, 0644)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to open the file '%s': %s", filePath, err) return nil, fmt.Errorf("failed to open the file '%s': %s", filePath, err)
} }
@ -41,7 +42,7 @@ func ParseUsers(filePath string) (*NcConverter, error) {
return nil, fmt.Errorf("failed to parse 'ncConverter.json': %s", err) return nil, fmt.Errorf("failed to parse 'ncConverter.json': %s", err)
} }
var conv NcConverter var conv NcConvertUsers
json.Unmarshal(byteValue, &conv) json.Unmarshal(byteValue, &conv)

View File

@ -27,7 +27,7 @@ type Logging struct {
} }
// Parses the given configuration file (.yaml file) to an WebConfiguration // Parses the given configuration file (.yaml file) to an WebConfiguration
func ParseConfig(webConfig *WebConfig, file string) (*WebConfig, error) { func ParseWebConfig(webConfig *WebConfig, file string) (*WebConfig, error) {
if file == "" { if file == "" {
return webConfig, nil return webConfig, nil
} }
@ -68,7 +68,7 @@ func SetConfig() (*WebConfig, error) {
} }
} }
webConfig := getDefaultConfig() webConfig := getDefaultConfig()
webConfig, err := ParseConfig(webConfig, configPath) webConfig, err := ParseWebConfig(webConfig, configPath)
if err != nil { if err != nil {
logger.Error("Unable to parse the configuration file '%s': %s", configPath, err) logger.Error("Unable to parse the configuration file '%s': %s", configPath, err)
webConfig = getDefaultConfig() webConfig = getDefaultConfig()

View File

@ -0,0 +1,30 @@
package ncworker
import (
"rpjosh.de/ncDocConverter/internal/models"
)
type NcConvertScheduler struct {
users *models.NcConvertUsers
}
func NewScheduler(users *models.NcConvertUsers) *NcConvertScheduler {
scheduler := NcConvertScheduler {
users: users,
}
scheduler.ScheduleExecutions()
return &scheduler
}
func (scheduler NcConvertScheduler) ScheduleExecutions() {
for _, user := range scheduler.users.Users {
for _, job := range user.ConvertJobs {
convJob := NewJob(&job, &user)
convJob.ExecuteJob()
}
}
}

View File

@ -1,9 +1,307 @@
package ncworker package ncworker
type NcConverter struct { import (
NextcloudBaseUrl string`json:"nextcloudUrl"` "bytes"
Username string`json:"username"` "encoding/xml"
App "fmt"
SourceDir string`json:"users"` "io"
DestinationDir string`json:"users"` "net/http"
"path/filepath"
"strconv"
"strings"
"text/template"
"time"
"rpjosh.de/ncDocConverter/internal/models"
"rpjosh.de/ncDocConverter/pkg/logger"
"rpjosh.de/ncDocConverter/web"
)
type convertJob struct {
job *models.ConvertJob
user *models.User
}
type searchResult struct {
XMLName xml.Name `xml:"multistatus"`
Text string `xml:",chardata"`
D string `xml:"d,attr"`
S string `xml:"s,attr"`
Oc string `xml:"oc,attr"`
Nc string `xml:"nc,attr"`
Response []struct {
Text string `xml:",chardata"`
Href string `xml:"href"`
Propstat struct {
Text string `xml:",chardata"`
Prop struct {
Text string `xml:",chardata"`
Getcontenttype string `xml:"getcontenttype"`
Getlastmodified string `xml:"getlastmodified"`
Size string `xml:"size"`
Fileid int `xml:"fileid"`
} `xml:"prop"`
Status string `xml:"status"`
} `xml:"propstat"`
} `xml:"response"`
}
type ncFiles struct {
extension string
path string
lastModified time.Time
contentType string
size int
fileid int
}
type searchTemplateData struct {
Username string
Directory string
ContentType []string
}
func NewJob(job *models.ConvertJob, user *models.User) *convertJob {
convJob := &convertJob{
job: job,
user: user,
}
return convJob
}
func (job *convertJob) ExecuteJob() {
source := job.searchInDirectory(
job.job.SourceDir,
[]string {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/msword",
},
)
destination := job.searchInDirectory(
job.job.DestinationDir,
[]string {
"application/pdf",
},
)
preCount := len("/remote.php/dav/files/" + job.user.Username + "/")
// store the files in a map
sourceMap := make(map[string]ncFiles)
destinationMap := make(map[string]ncFiles)
for _, file := range source.Response {
path := file.Href[preCount:]
var extension = filepath.Ext(path)
var name = path[0:len(path)-len(extension)][len(job.job.SourceDir):]
// Time format: Fri, 23 Sep 2022 05:46:31 GMT
time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified)
if err != nil {
logger.Error("%s", err)
}
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
sourceMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
for _, file := range destination.Response {
path := file.Href[preCount:]
var extension = filepath.Ext(path)
var name = path[0:len(path)-len(extension)][len(job.job.DestinationDir):]
time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified)
if err != nil {
logger.Error("%s", err)
}
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
destinationMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
for index, source := range sourceMap {
// check if the file exists in the destination map
if dest, exists := destinationMap[index]; exists {
// compare timestamp and size
if dest.lastModified.Before(source.lastModified) {
job.convertFile(source.path, source.fileid, dest.path)
}
delete(destinationMap, index)
} else {
job.convertFile(
source.path, source.fileid, job.getDestinationDir(source.path),
)
delete(destinationMap, index)
}
}
// delete the files which are not available anymore
for _, dest := range destinationMap {
job.deleteFile(dest.path)
}
}
func (job *convertJob) getDestinationDir(sourceFile string) string {
sourceFile = sourceFile[len(job.job.SourceDir):]
var extension = filepath.Ext(sourceFile)
var name = sourceFile[0:len(sourceFile)-len(extension)]
return job.job.DestinationDir + name + ".pdf"
}
func (job *convertJob) createFoldersRecursively(destinationFile string) {
s := strings.Split(destinationFile, "/")
folderTree := ""
logger.Debug("Creating directory for file '%s'", destinationFile)
// webdav doesn't have an function to create directories recursively
for _, folder := range s[:len(s) - 1] {
folderTree += folder + "/"
client := http.Client{Timeout: 5 * time.Second}
req, err := http.NewRequest("MKCOL", job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + folderTree, nil)
if err != nil {
logger.Error("%s", err)
}
req.SetBasicAuth(job.user.Username, job.user.Password)
res, err := client.Do(req)
if err != nil {
logger.Error("%s", err)
}
if (res.StatusCode != 201 && res.StatusCode != 405) {
}
// status code 201 or 405 (already existing)
}
}
func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) {
logger.Debug("Trying to convert %s (%d) to %s", sourceFile, sourceid, destinationFile)
job.createFoldersRecursively(destinationFile)
client := http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest(http.MethodGet, job.user.NextcloudBaseUrl + "/apps/onlyoffice/downloadas", nil)
if err != nil {
logger.Error("%s", err)
}
req.SetBasicAuth(job.user.Username, job.user.Password)
q := req.URL.Query()
q.Add("fileId", fmt.Sprint(sourceid))
q.Add("toExtension", "pdf")
req.URL.RawQuery = q.Encode()
res, err := client.Do(req)
if err != nil {
logger.Error("%s", err)
}
// Status Code 200
defer res.Body.Close()
uploadClient := http.Client{Timeout: 10 * time.Second}
uploadReq, err := http.NewRequest(http.MethodPut, job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + destinationFile, res.Body)
if err != nil {
logger.Error("%s", err)
}
uploadReq.SetBasicAuth(job.user.Username, job.user.Password)
uploadReq.Header.Set("Content-Type", "application/binary")
res, err = uploadClient.Do(uploadReq)
if err != nil {
logger.Error("%s", err)
}
if (res.StatusCode != 204 && res.StatusCode != 201) {
logger.Error("Failed to create file %s (#%d)", destinationFile, res.StatusCode)
}
// Status Code 201
res.Body.Close()
}
func (job *convertJob) deleteFile(filePath string) {
client := http.Client{Timeout: 5 * time.Second}
req, err := http.NewRequest(http.MethodDelete, job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + filePath, nil)
if err != nil {
logger.Error("%s", err)
}
req.SetBasicAuth(job.user.Username, job.user.Password)
res, err := client.Do(req)
if err != nil {
logger.Error("%s", err)
}
if (res.StatusCode != 204) {
logger.Error("Failed to delete file %s (%d)", filePath, res.StatusCode)
}
}
// Searches all doc files in the source directory
func (job *convertJob) searchInDirectory(directory string, contentType []string) *searchResult {
client := http.Client{Timeout: 5 * time.Second}
template, err := template.ParseFS(web.ApiTemplateFiles, "apitemplate/ncsearch.tmpl.xml")
if err != nil {
logger.Error("%s", err)
}
var buf bytes.Buffer
templateData := searchTemplateData{
Username: job.user.Username,
Directory: directory,
ContentType: contentType,
}
if err = template.Execute(&buf, templateData); err != nil {
logger.Error("%s", err)
}
// Status code 207
req, err := http.NewRequest("SEARCH", job.user.NextcloudBaseUrl + "/remote.php/dav/", &buf)
if err != nil {
logger.Error("%s", err)
}
req.SetBasicAuth(job.user.Username, job.user.Password)
req.Header.Set("Content-Type", "application/xml")
res, err := client.Do(req)
if err != nil {
logger.Error("%s", err)
}
defer res.Body.Close()
resBody, err := io.ReadAll(res.Body)
if err != nil {
logger.Error("%s", err)
}
fmt.Print(res.StatusCode)
var result searchResult
if err = xml.Unmarshal(resBody, &result); err != nil {
logger.Error("%s", err)
}
return &result
} }

View File

@ -1,7 +0,0 @@
package ncworker
func scheduleExecutions() {
}

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<d:searchrequest xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
<d:basicsearch>
<d:select>
<d:prop>
<d:getcontenttype/>
<d:getlastmodified/>
<oc:size/>
<oc:fileid/>
</d:prop>
</d:select>
<d:from>
<d:scope>
<d:href>/files/{{.Username}}/{{.Directory}}</d:href>
<d:depth></d:depth>
</d:scope>
</d:from>
<d:where>
<d:and>
<d:or>
{{range .ContentType}}
<d:eq>
<d:prop>
<d:getcontenttype/>
</d:prop>
<d:literal>{{ . }}</d:literal>
</d:eq>
{{end}}
</d:or>
<d:gt>
<d:prop>
<oc:size/>
</d:prop>
<d:literal>100</d:literal>
</d:gt>
</d:and>
</d:where>
<d:orderby/>
</d:basicsearch>
</d:searchrequest>

View File

@ -12,3 +12,6 @@ var DevelopeFiles embed.FS
//go:embed "template" //go:embed "template"
var TemplateFiles embed.FS var TemplateFiles embed.FS
//go:embed "apitemplate"
var ApiTemplateFiles embed.FS