Improve office conversion

wip
Jonas Letzbor 2023-01-02 11:44:59 +01:00
parent 55ad1fbfee
commit 3843ebab9f
Signed by: RPJosh
GPG Key ID: 46D72F589702E55A
4 changed files with 153 additions and 100 deletions

View File

@ -7,8 +7,6 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"path/filepath"
"regexp"
"strings"
"sync"
@ -108,21 +106,8 @@ func (job *BsJob) ExecuteJob() {
}
// Make a map with path as index
destinationMap := make(map[string]ncFiles)
preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/")
for _, file := range destination.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):]
destinationMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: file.GetLastModified(),
}
}
prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/"
destinationMap := nextcloud.ParseSearchResult(destination, prefix, job.job.DestinationDir)
// Check for cache
job.cache()
@ -163,7 +148,7 @@ func (job *BsJob) ExecuteJob() {
// check if it has to be converted again (updated) or for the first time
des, exists := destinationMap[i]
if (!exists || b.lastModified.After(des.lastModified)) && !b.ignore {
if (!exists || b.lastModified.After(des.LastModified)) && !b.ignore {
wg.Add(1)
convertCount++
go func(book book, path string) {
@ -187,7 +172,7 @@ func (job *BsJob) ExecuteJob() {
// check if it has to be converted again (updated) or for the first time
des, exists := destinationMap[b.Name]
if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.lastModified)) {
if !b.converted && !b.ignore && (!exists || b.lastModified.After(des.LastModified)) {
wg.Add(1)
convertCount++
go func(book book, path string) {
@ -202,7 +187,7 @@ func (job *BsJob) ExecuteJob() {
// Delete the files which are not available anymore
for _, dest := range destinationMap {
err := nextcloud.DeleteFile(job.ncUser, dest.path)
err := nextcloud.DeleteFile(job.ncUser, dest.Path)
if err != nil {
logger.Error(utils.FirstCharToUppercase(err.Error()))
}

View File

@ -2,10 +2,11 @@ package ncworker
import (
"fmt"
"io"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"rpjosh.de/ncDocConverter/internal/models"
@ -19,13 +20,9 @@ type convertJob struct {
ncUser *models.NextcloudUser
}
type ncFiles struct {
extension string
path string
lastModified time.Time
contentType string
size int
fileid int
type convertQueu struct {
source nextcloud.NcFile
destination string
}
func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJob {
@ -40,7 +37,7 @@ func NewNcJob(job *models.NcConvertJob, ncUser *models.NextcloudUser) *convertJo
func (job *convertJob) ExecuteJob() {
// Get existing directory contents
source, err := nextcloud.SearchInDirectory(
sourceFolder, err := nextcloud.SearchInDirectory(
job.ncUser,
job.job.SourceDir,
[]string{
@ -53,7 +50,7 @@ func (job *convertJob) ExecuteJob() {
return
}
destination, err := nextcloud.SearchInDirectory(
destinationFolder, err := nextcloud.SearchInDirectory(
job.ncUser,
job.job.DestinationDir,
[]string{
@ -65,83 +62,89 @@ func (job *convertJob) ExecuteJob() {
return
}
preCount := len("/remote.php/dav/files/" + job.ncUser.Username + "/")
// Store the files in a map
sourceMap := make(map[string]ncFiles)
destinationMap := make(map[string]ncFiles)
// Store all files in a map
prefix := "/remote.php/dav/files/" + job.ncUser.Username + "/"
sourceMap := nextcloud.ParseSearchResult(sourceFolder, prefix, job.job.SourceDir)
destinationMap := nextcloud.ParseSearchResult(destinationFolder, prefix, job.job.DestinationDir)
for _, file := range source.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.SourceDir):]
time := file.GetLastModified()
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
sourceMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
// check which files should be converted
var filesToConvert []convertQueu
var directorys []string
for _, file := range destination.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(job.job.DestinationDir):]
time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified)
if err != nil {
logger.Error("%s", err)
}
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("%s", err)
}
destinationMap[name] = ncFiles{
extension: extension,
path: path,
lastModified: time,
size: size,
contentType: file.Propstat.Prop.Getcontenttype,
fileid: file.Propstat.Prop.Fileid,
}
}
convertCount := 0
for index, source := range sourceMap {
// check if the file exists in the destination map
// Check if the file exists in the destination map
if dest, exists := destinationMap[index]; exists {
// compare timestamp and size
if dest.lastModified.Before(source.lastModified) {
job.convertFile(source.path, source.fileid, dest.path)
convertCount++
// Compare timestamp and size
if dest.LastModified.Before(source.LastModified) {
filesToConvert = append(filesToConvert, convertQueu{source: source, destination: dest.Path})
}
delete(destinationMap, index)
} else {
job.convertFile(
source.path, source.fileid, job.getDestinationDir(source.path),
)
convertCount++
// the directory could not be existing -> check for existance
destinationDir := job.getDestinationDir(source.Path)
appendIfNotExists(&directorys, destinationDir[0:strings.LastIndex(destinationDir, "/")+1])
filesToConvert = append(filesToConvert, convertQueu{source: source, destination: destinationDir})
delete(destinationMap, index)
}
}
var wg sync.WaitGroup
// Delete the files which are not available anymore
wg.Add(len(destinationMap))
for _, dest := range destinationMap {
err := nextcloud.DeleteFile(job.ncUser, dest.path)
go func(file *nextcloud.NcFile) {
err := nextcloud.DeleteFile(job.ncUser, dest.Path)
if err != nil {
logger.Error(utils.FirstCharToUppercase(err.Error()))
}
wg.Done()
}(&dest)
}
wg.Wait()
// Create required directorys
wg.Add(len(directorys))
for _, dest := range directorys {
go func(path string) {
nextcloud.CreateFoldersRecursively(job.ncUser, path)
wg.Done()
}(dest)
}
wg.Wait()
// Convert the files
wg.Add(len(filesToConvert))
for _, file := range filesToConvert {
logger.Info("Path: %s", file.source.Path)
go func(cvt convertQueu) {
job.convertFile(cvt.source.Path, cvt.source.Fileid, cvt.destination)
wg.Done()
}(file)
}
wg.Wait()
logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, len(filesToConvert))
}
logger.Info("Finished Nextcloud job \"%s\": %d documents converted", job.job.JobName, convertCount)
// Appends the directory to the array if it isn't contained
// by another element already
func appendIfNotExists(dirs *[]string, directory string) {
directoryLength := len(directory)
for i, currentDir := range *dirs {
currentLength := len(currentDir)
// the existing directory is already referenced in the current
if directoryLength > currentLength && directory[0:currentLength] == currentDir {
(*dirs)[i] = directory
continue
} else if directoryLength <= currentLength && currentDir[0:directoryLength] == directory {
continue
}
}
*dirs = append(*dirs, directory)
}
func (job *convertJob) getDestinationDir(sourceFile string) string {
@ -154,9 +157,7 @@ func (job *convertJob) getDestinationDir(sourceFile string) string {
// Converts the source file to the destination file utilizing the onlyoffice convert api
func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) {
logger.Debug("Trying to convert %s (%d) to %s", sourceFile, sourceid, destinationFile)
nextcloud.CreateFoldersRecursively(job.ncUser, destinationFile)
logger.Debug("Converting %s (%d) to %s", sourceFile, sourceid, destinationFile)
client := http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest(http.MethodGet, job.ncUser.NextcloudBaseUrl+"/apps/onlyoffice/downloadas", nil)
@ -172,11 +173,17 @@ func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationF
res, err := client.Do(req)
if err != nil {
logger.Error("%s", err)
logger.Error("Failed to access the convert api: %s", err)
return
}
// Status Code 200
defer res.Body.Close()
if res.StatusCode != 200 {
body, _ := io.ReadAll(res.Body)
logger.Error("Failed to access the convert api (#%d). Do you have OnlyOffice installed?: %s", res.StatusCode, body)
return
}
uploadClient := http.Client{Timeout: 10 * time.Second}
uploadReq, err := http.NewRequest(http.MethodPut, job.ncUser.NextcloudBaseUrl+"/remote.php/dav/files/"+job.ncUser.Username+"/"+destinationFile, res.Body)

View File

@ -6,6 +6,9 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"path/filepath"
"strconv"
"strings"
"text/template"
"time"
@ -15,6 +18,22 @@ import (
"rpjosh.de/ncDocConverter/web"
)
// The internal representation of a nextcloud file
type NcFile struct {
// File extension: txt
Extension string
// Relative path of the file to the nextcloud root: /folder/file.txt
Path string
LastModified time.Time
ContentType string
// Size in Bytes
Size int
// The unique file ID of the nextcloud server
Fileid int
// The Webdav URL for file reference
WebdavURL string
}
type searchTemplateData struct {
Username string
Directory string
@ -104,6 +123,13 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy
return nil, err
}
// Create folder if not existing
if res.StatusCode == 404 {
logger.Info("Creating directory '%s' because it does not exist", "/"+directory)
CreateFoldersRecursively(ncUser, "/"+directory+"notExisting.pdf")
return &searchResult{}, nil
}
if res.StatusCode != 207 {
return nil, fmt.Errorf("status code %d: %s", res.StatusCode, resBody)
}
@ -116,6 +142,40 @@ func SearchInDirectory(ncUser *models.NextcloudUser, directory string, contentTy
return &result, nil
}
// Parses the response from the given search format to an NcFile.
// A map with the relative path based on the source Directory ("someFolder/file.txt")
// and the mathing NcFile will be returned. Therefore, also the source Directory has to be given.
//
// To determine the path without the prefix "/remote.php/dav/user/" it has to be given.
func ParseSearchResult(result *searchResult, prefix string, sourceDir string) map[string]NcFile {
preCount := len(prefix)
rtc := make(map[string]NcFile)
for _, file := range result.Response {
href, _ := url.QueryUnescape(file.Href)
path := href[preCount:]
var extension = filepath.Ext(path)
var name = path[0 : len(path)-len(extension)][len(sourceDir):]
time := file.GetLastModified()
size, err := strconv.Atoi(file.Propstat.Prop.Size)
if err != nil {
logger.Error("Failed to parse the file size '%s' to an integer: %s", file.Propstat.Prop.Size, err)
continue
}
rtc[name] = NcFile{
Extension: extension,
Path: path,
LastModified: time,
Size: size,
ContentType: file.Propstat.Prop.Getcontenttype,
Fileid: file.Propstat.Prop.Fileid,
WebdavURL: file.Href,
}
}
return rtc
}
// Delets a file with the given path.
// The path has to start at the root level: Ebook/myFolder/file.txt
func DeleteFile(ncUser *models.NextcloudUser, filePath string) error {
@ -154,7 +214,7 @@ func CreateFoldersRecursively(ncUser *models.NextcloudUser, destinationFile stri
}
if res.StatusCode != 201 && res.StatusCode != 405 {
logger.Error("Failed to create directorys")
logger.Error("Failed to create directory '%s'", folderTree)
}
}
}

View File

@ -10,5 +10,6 @@ if "%~1"=="-FIXED_CTRL_C" (
GOTO :EOF
)
SET PATH=%PATH%;C:\Windows\System32
set GOTMPDIR=C:\MYCOMP
nodemon --delay 1s -e go,html --ignore web/app/ --exec go run ./cmd/ncDocConverth --signal SIGTERM
nodemon --delay 1s -e go,html --ignore web/app/ --signal SIGKILL --exec go run ./cmd/ncDocConverth || exit 1