Improve office conversion

wip
Jonas Letzbor 2023-01-02 11:44:59 +01:00
parent 55ad1fbfee
commit 3843ebab9f
Signed by: RPJosh
GPG Key ID: 46D72F589702E55A
4 changed files with 153 additions and 100 deletions

View File

@ -7,8 +7,6 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/url"
"path/filepath"
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
@ -108,21 +106,8 @@ func (job *BsJob) ExecuteJob() {
} }
// Make a map with path as index // Make a map with path as index
destinationMap := make(map[string]ncFiles) prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/"
destinationMap := nextcloud.ParseSearchResult(destination, prefix, job.job.DestinationDir)
preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/")
for _, file := range destination.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):]
destinationMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: file.GetLastModified(),
}
}
// Check for cache // Check for cache
job.cache() job.cache()
@ -163,7 +148,7 @@ func (job *BsJob) ExecuteJob() {
// check if it has to be converted again (updated) or for the first time // check if it has to be converted again (updated) or for the first time
des, exists := destinationMap[i] des, exists := destinationMap[i]
if (!exists || b.lastModified.After(des.lastModified)) && !b.ignore { if (!exists || b.lastModified.After(des.LastModified)) && !b.ignore {
wg.Add(1) wg.Add(1)
convertCount++ convertCount++
go func(book book, path string) { go func(book book, path string) {
@ -187,7 +172,7 @@ func (job *BsJob) ExecuteJob() {
// check if it has to be converted again (updated) or for the first time // check if it has to be converted again (updated) or for the first time
des, exists := destinationMap[b.Name] des, exists := destinationMap[b.Name]
if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.lastModified)) { if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.LastModified)) {
wg.Add(1) wg.Add(1)
convertCount++ convertCount++
go func(book book, path string) { go func(book book, path string) {
@ -202,7 +187,7 @@ func (job *BsJob) ExecuteJob() {
// Delete the files which are not available anymore // Delete the files which are not available anymore
for _, dest := range destinationMap { for _, dest := range destinationMap {
err := nextcloud.DeleteFile(job.ncUser, dest.path) err := nextcloud.DeleteFile(job.ncUser, dest.Path)
if err != nil { if err != nil {
logger.Error(utils.FirstCharToUppercase(err.Error())) logger.Error(utils.FirstCharToUppercase(err.Error()))
} }

View File

@ -2,10 +2,11 @@ package ncworker
import ( import (
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/url"
"path/filepath" "path/filepath"
"strconv" "strings"
"sync"
"time" "time"
"rpjosh.de/ncDocConverter/internal/models" "rpjosh.de/ncDocConverter/internal/models"
@ -19,13 +20,9 @@ type convertJob struct {
ncUser *models.NextcloudUser ncUser *models.NextcloudUser
} }
type ncFiles struct { type convertQueu struct {
extension string source nextcloud.NcFile
path string destination string
lastModified time.Time
contentType string
size int
fileid int
} }
func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJob { func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJob {
@ -40,7 +37,7 @@ func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJo
func (job *convertJob) ExecuteJob() { func (job *convertJob) ExecuteJob() {
// Get existing directory contents // Get existing directory contents
source, err := nextcloud.SearchInDirectory( sourceFolder, err := nextcloud.SearchInDirectory(
job.ncUser, job.ncUser,
job.job.SourceDir, job.job.SourceDir,
[]string{ []string{
@ -53,7 +50,7 @@ func (job *convertJob) ExecuteJob() {
return return
} }
destination, err := nextcloud.SearchInDirectory( destinationFolder, err := nextcloud.SearchInDirectory(
job.ncUser, job.ncUser,
job.job.DestinationDir, job.job.DestinationDir,
[]string{ []string{
@ -65,83 +62,89 @@ func (job *convertJob) ExecuteJob() {
return return
} }
preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/") // Store all files in a map
// Store the files in a map prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/"
sourceMap := make(map[string]ncFiles) sourceMap := nextcloud.ParseSearchResult(sourceFolder, prefix, job.job.SourceDir)
destinationMap := make(map[string]ncFiles) destinationMap := nextcloud.ParseSearchResult(destinationFolder, prefix, job.job.DestinationDir)
for _, file := range source.Response { // check which files should be converted
href, _ := url.QueryUnescape(file.Href) var filesToConvert []convertQueu
path := href[preCount:] var directorys []string
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.SourceDir):]
time := file.GetLastModified()
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
sourceMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
for _, file := range destination.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):]
time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified)
if err != nil {
logger.Error("%s", err)
}
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
destinationMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
convertCount := 0
for index, source := range sourceMap { for index, source := range sourceMap {
// check if the file exists in the destination map // Check if the file exists in the destination map
if dest, exists := destinationMap[index]; exists { if dest, exists := destinationMap[index]; exists {
// compare timestamp and size // Compare timestamp and size
if dest.lastModified.Before(source.lastModified) { if dest.LastModified.Before(source.LastModified) {
job.convertFile(source.path, source.fileid, dest.path) filesToConvert = append(filesToConvert, convertQueu{source: source, destination: dest.Path})
convertCount++
} }
delete(destinationMap, index) delete(destinationMap, index)
} else { } else {
job.convertFile( // the directory could not be existing -> check for existance
source.path, source.fileid, job.getDestinationDir(source.path), destinationDir := job.getDestinationDir(source.Path)
) appendIfNotExists(&directorys, destinationDir[0:strings.LastIndex(destinationDir, "/")+1])
convertCount++
filesToConvert = append(filesToConvert, convertQueu{source: source, destination: destinationDir})
delete(destinationMap, index) delete(destinationMap, index)
} }
} }
var wg sync.WaitGroup
// Delete the files which are not available anymore // Delete the files which are not available anymore
wg.Add(len(destinationMap))
for _, dest := range destinationMap { for _, dest := range destinationMap {
err := nextcloud.DeleteFile(job.ncUser, dest.path) go func(file *nextcloud.NcFile) {
if err != nil { err := nextcloud.DeleteFile(job.ncUser, dest.Path)
logger.Error(utils.FirstCharToUppercase(err.Error())) if err != nil {
logger.Error(utils.FirstCharToUppercase(err.Error()))
}
wg.Done()
}(&dest)
}
wg.Wait()
// Create required directorys
wg.Add(len(directorys))
for _, dest := range directorys {
go func(path string) {
nextcloud.CreateFoldersRecursively(job.ncUser, path)
wg.Done()
}(dest)
}
wg.Wait()
// Convert the files
wg.Add(len(filesToConvert))
for _, file := range filesToConvert {
logger.Info("Path: %s", file.source.Path)
go func(cvt convertQueu) {
job.convertFile(cvt.source.Path, cvt.source.Fileid, cvt.destination)
wg.Done()
}(file)
}
wg.Wait()
logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, len(filesToConvert))
}
// Appends the directory to the array if it isn't contained
// by another element already
func appendIfNotExists(dirs *[]string, directory string) {
directoryLength := len(directory)
for i, currentDir := range *dirs {
currentLength := len(currentDir)
// the existing directory is already referenced in the current
if directoryLength > currentLength && directory[0:currentLength] == currentDir {
(*dirs)[i] = directory
continue
} else if directoryLength <= currentLength && currentDir[0:directoryLength] == directory {
continue
} }
} }
*dirs = append(*dirs, directory)
logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, convertCount)
} }
func (job *convertJob) getDestinationDir(sourceFile string) string { func (job *convertJob) getDestinationDir(sourceFile string) string {
@ -154,9 +157,7 @@ func (job *convertJob) getDestinationDir(sourceFile string) string {
// Converts the source file to the destination file utilizing the onlyoffice convert api // Converts the source file to the destination file utilizing the onlyoffice convert api
func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) { func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) {
logger.Debug("Trying to convert %s (%d) to %s", sourceFile, sourceid, destinationFile) logger.Debug("Converting %s (%d) to %s", sourceFile, sourceid, destinationFile)
nextcloud.CreateFoldersRecursively(job.ncUser, destinationFile)
client := http.Client{Timeout: 10 * time.Second} client := http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest(http.MethodGet, job.ncUser.NextcloudBaseUrl+"/apps/onlyoffice/downloadas", nil) req, err := http.NewRequest(http.MethodGet, job.ncUser.NextcloudBaseUrl+"/apps/onlyoffice/downloadas", nil)
@ -172,11 +173,17 @@ func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationF
res, err := client.Do(req) res, err := client.Do(req)
if err != nil { if err != nil {
logger.Error("%s", err) logger.Error("Failed to access the convert api: %s", err)
return
} }
// Status Code 200
defer res.Body.Close() defer res.Body.Close()
if res.StatusCode != 200 {
body, _ := io.ReadAll(res.Body)
logger.Error("Failed to access the convert api (#%d). Do you have OnlyOffice installed?: %s", res.StatusCode, body)
return
}
uploadClient := http.Client{Timeout: 10 * time.Second} uploadClient := http.Client{Timeout: 10 * time.Second}
uploadReq, err := http.NewRequest(http.MethodPut, job.ncUser.NextcloudBaseUrl+"/remote.php/dav/files/"+job.ncUser.Username+"/"+destinationFile, res.Body) uploadReq, err := http.NewRequest(http.MethodPut, job.ncUser.NextcloudBaseUrl+"/remote.php/dav/files/"+job.ncUser.Username+"/"+destinationFile, res.Body)

View File

@ -6,6 +6,9 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"net/url"
"path/filepath"
"strconv"
"strings" "strings"
"text/template" "text/template"
"time" "time"
@ -15,6 +18,22 @@ import (
"rpjosh.de/ncDocConverter/web" "rpjosh.de/ncDocConverter/web"
) )
// The internal representation of a nextcloud file
type NcFile struct {
// File extension: txt
Extension string
// Relative path of the file to the nextcloud root: /folder/file.txt
Path string
LastModified time.Time
ContentType string
// Size in Bytes
Size int
// The unique file ID of the nextcloud server
Fileid int
// The Webdav URL for file reference
WebdavURL string
}
type searchTemplateData struct { type searchTemplateData struct {
Username string Username string
Directory string Directory string
@ -104,6 +123,13 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy
return nil, err return nil, err
} }
// Create folder if not existing
if res.StatusCode == 404 {
logger.Info("Creating directory '%s' because it does not exist", "/"+directory)
CreateFoldersRecursively(ncUser, "/"+directory+"notExisting.pdf")
return &searchResult{}, nil
}
if res.StatusCode != 207 { if res.StatusCode != 207 {
return nil, fmt.Errorf("status code %d: %s", res.StatusCode, resBody) return nil, fmt.Errorf("status code %d: %s", res.StatusCode, resBody)
} }
@ -116,6 +142,40 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy
return &result, nil return &result, nil
} }
// Parses the response from the given search format to an NcFile.
// A map with the relative path based on the source Directory ("someFolder/file.txt")
// and the mathing NcFile will be returned. Therefore, also the source Directory has to be given.
//
// To determine the path without the prefix "/remote.php/dav/user/" it has to be given.
func ParseSearchResult(result *searchResult, prefix string, sourceDir string) map[string]NcFile {
preCount := len(prefix)
rtc := make(map[string]NcFile)
for _, file := range result.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(sourceDir):]
time := file.GetLastModified()
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("Failed to parse the file size '%s' to an integer: %s", file.Propstat.Prop.Size, err)
continue
}
rtc[name] = NcFile{
Extension: extension,
Path: path,
LastModified: time,
Size: size,
ContentType: file.Propstat.Prop.Getcontenttype,
Fileid: file.Propstat.Prop.Fileid,
WebdavURL: file.Href,
}
}
return rtc
}
// Delets a file with the given path. // Delets a file with the given path.
// The path has to start at the root level: Ebook/myFolder/file.txt // The path has to start at the root level: Ebook/myFolder/file.txt
func DeleteFile(ncUser *models.NextcloudUser, filePath string) error { func DeleteFile(ncUser *models.NextcloudUser, filePath string) error {
@ -154,7 +214,7 @@ func CreateFoldersRecursively(ncUser *models.NextcloudUser, destinationFile stri
} }
if res.StatusCode != 201 && res.StatusCode != 405 { if res.StatusCode != 201 && res.StatusCode != 405 {
logger.Error("Failed to create directorys") logger.Error("Failed to create directory '%s'", folderTree)
} }
} }
} }

View File

@ -10,5 +10,6 @@ if "%~1"=="-FIXED_CTRL_C" (
GOTO :EOF GOTO :EOF
) )
SET PATH=%PATH%;C:\Windows\System32
set GOTMPDIR=C:\MYCOMP set GOTMPDIR=C:\MYCOMP
nodemon --delay 1s -e go,html --ignore web/app/ --exec go run ./cmd/ncDocConverth --signal SIGTERM nodemon --delay 1s -e go,html --ignore web/app/ --signal SIGKILL --exec go run ./cmd/ncDocConverth || exit 1