From 3843ebab9f51b4defe759d720754ed4443c65cf0 Mon Sep 17 00:00:00 2001 From: RPJosh Date: Mon, 2 Jan 2023 11:44:59 +0100 Subject: [PATCH] Improve office conversion --- internal/ncworker/bookStack.go | 25 +---- internal/ncworker/office.go | 163 +++++++++++++++++--------------- internal/nextcloud/nextcloud.go | 62 +++++++++++- scripts/run.cmd | 3 +- 4 files changed, 153 insertions(+), 100 deletions(-) diff --git a/internal/ncworker/bookStack.go b/internal/ncworker/bookStack.go index d090491..5869838 100644 --- a/internal/ncworker/bookStack.go +++ b/internal/ncworker/bookStack.go @@ -7,8 +7,6 @@ import ( "fmt" "io" "net/http" - "net/url" - "path/filepath" "regexp" "strings" "sync" @@ -108,21 +106,8 @@ func (job *BsJob) ExecuteJob() { } // Make a map with path as index - destinationMap := make(map[string]ncFiles) - - preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/") - for _, file := range destination.Response { - href, _ := url.QueryUnescape(file.Href) - path := href[preCount:] - var extension = filepath.Ext(path) - var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):] - - destinationMap[name] = ncFiles{ - extension: extension, - path: path, - lastModified: file.GetLastModified(), - } - } + prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/" + destinationMap := nextcloud.ParseSearchResult(destination, prefix, job.job.DestinationDir) // Check for cache job.cache() @@ -163,7 +148,7 @@ func (job *BsJob) ExecuteJob() { // check if it has to be converted again (updated) or for the first time des, exists := destinationMap[i] - if (!exists || b.lastModified.After(des.lastModified)) && !b.ignore { + if (!exists || b.lastModified.After(des.LastModified)) && !b.ignore { wg.Add(1) convertCount++ go func(book book, path string) { @@ -187,7 +172,7 @@ func (job *BsJob) ExecuteJob() { // check if it has to be converted again (updated) or for the first time des, exists := destinationMap[b.Name] - if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.lastModified)) { + if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.LastModified)) { wg.Add(1) convertCount++ go func(book book, path string) { @@ -202,7 +187,7 @@ func (job *BsJob) ExecuteJob() { // Delete the files which are not available anymore for _, dest := range destinationMap { - err := nextcloud.DeleteFile(job.ncUser, dest.path) + err := nextcloud.DeleteFile(job.ncUser, dest.Path) if err != nil { logger.Error(utils.FirstCharToUppercase(err.Error())) } diff --git a/internal/ncworker/office.go b/internal/ncworker/office.go index c6aabec..822b7e1 100644 --- a/internal/ncworker/office.go +++ b/internal/ncworker/office.go @@ -2,10 +2,11 @@ package ncworker import ( "fmt" + "io" "net/http" - "net/url" "path/filepath" - "strconv" + "strings" + "sync" "time" "rpjosh.de/ncDocConverter/internal/models" @@ -19,13 +20,9 @@ type convertJob struct { ncUser *models.NextcloudUser } -type ncFiles struct { - extension string - path string - lastModified time.Time - contentType string - size int - fileid int +type convertQueu struct { + source nextcloud.NcFile + destination string } func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJob { @@ -40,7 +37,7 @@ func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJo func (job *convertJob) ExecuteJob() { // Get existing directory contents - source, err := nextcloud.SearchInDirectory( + sourceFolder, err := nextcloud.SearchInDirectory( job.ncUser, job.job.SourceDir, []string{ @@ -53,7 +50,7 @@ func (job *convertJob) ExecuteJob() { return } - destination, err := nextcloud.SearchInDirectory( + destinationFolder, err := nextcloud.SearchInDirectory( job.ncUser, job.job.DestinationDir, []string{ @@ -65,83 +62,89 @@ func (job *convertJob) ExecuteJob() { return } - preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/") - // Store the files in a map - sourceMap := make(map[string]ncFiles) - destinationMap := make(map[string]ncFiles) + // Store all files in a map + prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/" + sourceMap := nextcloud.ParseSearchResult(sourceFolder, prefix, job.job.SourceDir) + destinationMap := nextcloud.ParseSearchResult(destinationFolder, prefix, job.job.DestinationDir) - for _, file := range source.Response { - href, _ := url.QueryUnescape(file.Href) - path := href[preCount:] - var extension = filepath.Ext(path) - var name = path[0 : len(path)-len(extension)][len(job.job.SourceDir):] - time := file.GetLastModified() - size, err := strconv.Atoi(file.Propstat.Prop.Size) - if err != nil { - logger.Error("%s", err) - } - sourceMap[name] = ncFiles{ - extension: extension, - path: path, - lastModified: time, - size: size, - contentType: file.Propstat.Prop.Getcontenttype, - fileid: file.Propstat.Prop.Fileid, - } - } + // check which files should be converted + var filesToConvert []convertQueu + var directorys []string - for _, file := range destination.Response { - href, _ := url.QueryUnescape(file.Href) - path := href[preCount:] - var extension = filepath.Ext(path) - var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):] - - time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified) - if err != nil { - logger.Error("%s", err) - } - size, err := strconv.Atoi(file.Propstat.Prop.Size) - if err != nil { - logger.Error("%s", err) - } - destinationMap[name] = ncFiles{ - extension: extension, - path: path, - lastModified: time, - size: size, - contentType: file.Propstat.Prop.Getcontenttype, - fileid: file.Propstat.Prop.Fileid, - } - } - - convertCount := 0 for index, source := range sourceMap { - // check if the file exists in the destination map + // Check if the file exists in the destination map if dest, exists := destinationMap[index]; exists { - // compare timestamp and size - if dest.lastModified.Before(source.lastModified) { - job.convertFile(source.path, source.fileid, dest.path) - convertCount++ + // Compare timestamp and size + if dest.LastModified.Before(source.LastModified) { + filesToConvert = append(filesToConvert, convertQueu{source: source, destination: dest.Path}) } delete(destinationMap, index) } else { - job.convertFile( - source.path, source.fileid, job.getDestinationDir(source.path), - ) - convertCount++ + // the directory could not be existing -> check for existance + destinationDir := job.getDestinationDir(source.Path) + appendIfNotExists(&directorys, destinationDir[0:strings.LastIndex(destinationDir, "/")+1]) + + filesToConvert = append(filesToConvert, convertQueu{source: source, destination: destinationDir}) + delete(destinationMap, index) } } + var wg sync.WaitGroup + // Delete the files which are not available anymore + wg.Add(len(destinationMap)) for _, dest := range destinationMap { - err := nextcloud.DeleteFile(job.ncUser, dest.path) - if err != nil { - logger.Error(utils.FirstCharToUppercase(err.Error())) + go func(file *nextcloud.NcFile) { + err := nextcloud.DeleteFile(job.ncUser, dest.Path) + if err != nil { + logger.Error(utils.FirstCharToUppercase(err.Error())) + } + wg.Done() + }(&dest) + } + wg.Wait() + + // Create required directorys + wg.Add(len(directorys)) + for _, dest := range directorys { + go func(path string) { + nextcloud.CreateFoldersRecursively(job.ncUser, path) + wg.Done() + }(dest) + } + wg.Wait() + + // Convert the files + wg.Add(len(filesToConvert)) + for _, file := range filesToConvert { + logger.Info("Path: %s", file.source.Path) + go func(cvt convertQueu) { + job.convertFile(cvt.source.Path, cvt.source.Fileid, cvt.destination) + wg.Done() + }(file) + } + wg.Wait() + + logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, len(filesToConvert)) +} + +// Appends the directory to the array if it isn't contained +// by another element already +func appendIfNotExists(dirs *[]string, directory string) { + directoryLength := len(directory) + for i, currentDir := range *dirs { + currentLength := len(currentDir) + + // the existing directory is already referenced in the current + if directoryLength > currentLength && directory[0:currentLength] == currentDir { + (*dirs)[i] = directory + continue + } else if directoryLength <= currentLength && currentDir[0:directoryLength] == directory { + continue } } - - logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, convertCount) + *dirs = append(*dirs, directory) } func (job *convertJob) getDestinationDir(sourceFile string) string { @@ -154,9 +157,7 @@ func (job *convertJob) getDestinationDir(sourceFile string) string { // Converts the source file to the destination file utilizing the onlyoffice convert api func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) { - logger.Debug("Trying to convert %s (%d) to %s", sourceFile, sourceid, destinationFile) - - nextcloud.CreateFoldersRecursively(job.ncUser, destinationFile) + logger.Debug("Converting %s (%d) to %s", sourceFile, sourceid, destinationFile) client := http.Client{Timeout: 10 * time.Second} req, err := http.NewRequest(http.MethodGet, job.ncUser.NextcloudBaseUrl+"/apps/onlyoffice/downloadas", nil) @@ -172,11 +173,17 @@ func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationF res, err := client.Do(req) if err != nil { - logger.Error("%s", err) + logger.Error("Failed to access the convert api: %s", err) + return } - // Status Code 200 defer res.Body.Close() + if res.StatusCode != 200 { + body, _ := io.ReadAll(res.Body) + logger.Error("Failed to access the convert api (#%d). Do you have OnlyOffice installed?: %s", res.StatusCode, body) + return + } + uploadClient := http.Client{Timeout: 10 * time.Second} uploadReq, err := http.NewRequest(http.MethodPut, job.ncUser.NextcloudBaseUrl+"/remote.php/dav/files/"+job.ncUser.Username+"/"+destinationFile, res.Body) diff --git a/internal/nextcloud/nextcloud.go b/internal/nextcloud/nextcloud.go index 7baa7d6..5c6f6f4 100644 --- a/internal/nextcloud/nextcloud.go +++ b/internal/nextcloud/nextcloud.go @@ -6,6 +6,9 @@ import ( "fmt" "io" "net/http" + "net/url" + "path/filepath" + "strconv" "strings" "text/template" "time" @@ -15,6 +18,22 @@ import ( "rpjosh.de/ncDocConverter/web" ) +// The internal representation of a nextcloud file +type NcFile struct { + // File extension: txt + Extension string + // Relative path of the file to the nextcloud root: /folder/file.txt + Path string + LastModified time.Time + ContentType string + // Size in Bytes + Size int + // The unique file ID of the nextcloud server + Fileid int + // The Webdav URL for file reference + WebdavURL string +} + type searchTemplateData struct { Username string Directory string @@ -104,6 +123,13 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy return nil, err } + // Create folder if not existing + if res.StatusCode == 404 { + logger.Info("Creating directory '%s' because it does not exist", "/"+directory) + CreateFoldersRecursively(ncUser, "/"+directory+"notExisting.pdf") + return &searchResult{}, nil + } + if res.StatusCode != 207 { return nil, fmt.Errorf("status code %d: %s", res.StatusCode, resBody) } @@ -116,6 +142,40 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy return &result, nil } +// Parses the response from the given search format to an NcFile. +// A map with the relative path based on the source Directory ("someFolder/file.txt") +// and the mathing NcFile will be returned. Therefore, also the source Directory has to be given. +// +// To determine the path without the prefix "/remote.php/dav/user/" it has to be given. +func ParseSearchResult(result *searchResult, prefix string, sourceDir string) map[string]NcFile { + preCount := len(prefix) + rtc := make(map[string]NcFile) + + for _, file := range result.Response { + href, _ := url.QueryUnescape(file.Href) + path := href[preCount:] + var extension = filepath.Ext(path) + var name = path[0 : len(path)-len(extension)][len(sourceDir):] + time := file.GetLastModified() + size, err := strconv.Atoi(file.Propstat.Prop.Size) + if err != nil { + logger.Error("Failed to parse the file size '%s' to an integer: %s", file.Propstat.Prop.Size, err) + continue + } + rtc[name] = NcFile{ + Extension: extension, + Path: path, + LastModified: time, + Size: size, + ContentType: file.Propstat.Prop.Getcontenttype, + Fileid: file.Propstat.Prop.Fileid, + WebdavURL: file.Href, + } + } + + return rtc +} + // Delets a file with the given path. // The path has to start at the root level: Ebook/myFolder/file.txt func DeleteFile(ncUser *models.NextcloudUser, filePath string) error { @@ -154,7 +214,7 @@ func CreateFoldersRecursively(ncUser *models.NextcloudUser, destinationFile stri } if res.StatusCode != 201 && res.StatusCode != 405 { - logger.Error("Failed to create directorys") + logger.Error("Failed to create directory '%s'", folderTree) } } } diff --git a/scripts/run.cmd b/scripts/run.cmd index f976424..7ddf212 100644 --- a/scripts/run.cmd +++ b/scripts/run.cmd @@ -10,5 +10,6 @@ if "%~1"=="-FIXED_CTRL_C" ( GOTO :EOF ) +SET PATH=%PATH%;C:\Windows\System32 set GOTMPDIR=C:\MYCOMP -nodemon --delay 1s -e go,html --ignore web/app/ --exec go run ./cmd/ncDocConverth --signal SIGTERM \ No newline at end of file +nodemon --delay 1s -e go,html --ignore web/app/ --signal SIGKILL --exec go run ./cmd/ncDocConverth || exit 1 \ No newline at end of file