From bfc1ad40a5c08f53104e71b48ee018c41200daae Mon Sep 17 00:00:00 2001 From: RPJosh Date: Fri, 23 Sep 2022 11:58:53 +0200 Subject: [PATCH] Implement basic conversion support (doc + docx -> pdf) --- .gitignore | 1 + cmd/ncDocConverth/main.go | 11 + go.mod | 1 + internal/models/ncconvert.go | 9 +- internal/models/webconfig.go | 4 +- internal/ncworker/convertscheduler.go | 30 +++ internal/ncworker/ncconverter.go | 310 +++++++++++++++++++++++++- internal/ncworker/scheduler.go | 7 - web/apitemplate/ncsearch.tmpl.xml | 40 ++++ web/efs.go | 5 +- 10 files changed, 398 insertions(+), 20 deletions(-) create mode 100644 internal/ncworker/convertscheduler.go delete mode 100644 internal/ncworker/scheduler.go create mode 100644 web/apitemplate/ncsearch.tmpl.xml diff --git a/.gitignore b/.gitignore index ff841af..356e6f7 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ go.work # Locally used configuration file /config.yaml +/ncConverter.json # Vite build file /.vite diff --git a/cmd/ncDocConverth/main.go b/cmd/ncDocConverth/main.go index c29d399..0f4801f 100644 --- a/cmd/ncDocConverth/main.go +++ b/cmd/ncDocConverth/main.go @@ -6,6 +6,7 @@ import ( "time" "rpjosh.de/ncDocConverter/internal/models" + "rpjosh.de/ncDocConverter/internal/ncworker" "rpjosh.de/ncDocConverter/pkg/logger" ) @@ -50,6 +51,16 @@ func main() { WriteTimeout: 10 * time.Second, } + ncConvertUsers, err := models.ParseConvertUsers("./ncConverter.json") + if err != nil { + logger.Error("Unable to parse the file %s: %s", "dd", err) + } + ncworker.NewScheduler(ncConvertUsers) + + if 1 == 1 { + return + } + logger.Info("Server started on %s", config.Server.Address) var errw error if config.Server.Certificate == "" { diff --git a/go.mod b/go.mod index edbaa26..d55a4ef 100644 --- a/go.mod +++ b/go.mod @@ -11,3 +11,4 @@ require ( ) // https://zhwt.github.io/yaml-to-go/ +// https://www.onlinetool.io/xmltogo/ \ No newline at end of file diff --git a/internal/models/ncconvert.go b/internal/models/ncconvert.go index dc7fad2..ee0d19b 100644 --- a/internal/models/ncconvert.go +++ b/internal/models/ncconvert.go @@ -12,6 +12,7 @@ type User struct { NextcloudBaseUrl string`json:"nextcloudUrl"` Username string`json:"username"` Password string`json:"password"` + ConvertJobs []ConvertJob`json:"jobs"` } type ConvertJob struct { @@ -23,14 +24,14 @@ type ConvertJob struct { Executions []string`json:"execution"` } -type NcConverter struct { +type NcConvertUsers struct { Users []User`json:"users"` } // Parses the given file to the in memory struct -func ParseUsers(filePath string) (*NcConverter, error) { +func ParseConvertUsers(filePath string) (*NcConvertUsers, error) { - file, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + file, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE, 0644) if err != nil { return nil, fmt.Errorf("failed to open the file '%s': %s", filePath, err) } @@ -41,7 +42,7 @@ func ParseUsers(filePath string) (*NcConverter, error) { return nil, fmt.Errorf("failed to parse 'ncConverter.json': %s", err) } - var conv NcConverter + var conv NcConvertUsers json.Unmarshal(byteValue, &conv) diff --git a/internal/models/webconfig.go b/internal/models/webconfig.go index 5c453cb..0d66c51 100644 --- a/internal/models/webconfig.go +++ b/internal/models/webconfig.go @@ -27,7 +27,7 @@ type Logging struct { } // Parses the given configuration file (.yaml file) to an WebConfiguration -func ParseConfig(webConfig *WebConfig, file string) (*WebConfig, error) { +func ParseWebConfig(webConfig *WebConfig, file string) (*WebConfig, error) { if file == "" { return webConfig, nil } @@ -68,7 +68,7 @@ func SetConfig() (*WebConfig, error) { } } webConfig := getDefaultConfig() - webConfig, err := ParseConfig(webConfig, configPath) + webConfig, err := ParseWebConfig(webConfig, configPath) if err != nil { logger.Error("Unable to parse the configuration file '%s': %s", configPath, err) webConfig = getDefaultConfig() diff --git a/internal/ncworker/convertscheduler.go b/internal/ncworker/convertscheduler.go new file mode 100644 index 0000000..7039eb0 --- /dev/null +++ b/internal/ncworker/convertscheduler.go @@ -0,0 +1,30 @@ +package ncworker + +import ( + + "rpjosh.de/ncDocConverter/internal/models" +) + +type NcConvertScheduler struct { + users *models.NcConvertUsers +} + +func NewScheduler(users *models.NcConvertUsers) *NcConvertScheduler { + scheduler := NcConvertScheduler { + users: users, + } + + scheduler.ScheduleExecutions() + + return &scheduler +} + +func (scheduler NcConvertScheduler) ScheduleExecutions() { + for _, user := range scheduler.users.Users { + for _, job := range user.ConvertJobs { + convJob := NewJob(&job, &user) + convJob.ExecuteJob() + } + + } +} \ No newline at end of file diff --git a/internal/ncworker/ncconverter.go b/internal/ncworker/ncconverter.go index 8c1e9fb..4f0d529 100644 --- a/internal/ncworker/ncconverter.go +++ b/internal/ncworker/ncconverter.go @@ -1,9 +1,307 @@ package ncworker -type NcConverter struct { - NextcloudBaseUrl string`json:"nextcloudUrl"` - Username string`json:"username"` - App - SourceDir string`json:"users"` - DestinationDir string`json:"users"` +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "net/http" + "path/filepath" + "strconv" + "strings" + "text/template" + "time" + + "rpjosh.de/ncDocConverter/internal/models" + "rpjosh.de/ncDocConverter/pkg/logger" + "rpjosh.de/ncDocConverter/web" +) + +type convertJob struct { + job *models.ConvertJob + user *models.User +} + +type searchResult struct { + XMLName xml.Name `xml:"multistatus"` + Text string `xml:",chardata"` + D string `xml:"d,attr"` + S string `xml:"s,attr"` + Oc string `xml:"oc,attr"` + Nc string `xml:"nc,attr"` + Response []struct { + Text string `xml:",chardata"` + Href string `xml:"href"` + Propstat struct { + Text string `xml:",chardata"` + Prop struct { + Text string `xml:",chardata"` + Getcontenttype string `xml:"getcontenttype"` + Getlastmodified string `xml:"getlastmodified"` + Size string `xml:"size"` + Fileid int `xml:"fileid"` + } `xml:"prop"` + Status string `xml:"status"` + } `xml:"propstat"` + } `xml:"response"` +} + +type ncFiles struct { + extension string + path string + lastModified time.Time + contentType string + size int + fileid int +} + +type searchTemplateData struct { + Username string + Directory string + ContentType []string +} + +func NewJob(job *models.ConvertJob, user *models.User) *convertJob { + convJob := &convertJob{ + job: job, + user: user, + + } + + return convJob +} + +func (job *convertJob) ExecuteJob() { + source := job.searchInDirectory( + job.job.SourceDir, + []string { + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/msword", + }, + ) + destination := job.searchInDirectory( + job.job.DestinationDir, + []string { + "application/pdf", + }, + ) + + + preCount := len("/remote.php/dav/files/" + job.user.Username + "/") + // store the files in a map + sourceMap := make(map[string]ncFiles) + destinationMap := make(map[string]ncFiles) + + for _, file := range source.Response { + path := file.Href[preCount:] + var extension = filepath.Ext(path) + var name = path[0:len(path)-len(extension)][len(job.job.SourceDir):] + // Time format: Fri, 23 Sep 2022 05:46:31 GMT + time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified) + if err != nil { + logger.Error("%s", err) + } + size, err := strconv.Atoi(file.Propstat.Prop.Size) + if err != nil { + logger.Error("%s", err) + } + sourceMap[name] = ncFiles{ + extension: extension, + path: path, + lastModified: time, + size: size, + contentType: file.Propstat.Prop.Getcontenttype, + fileid: file.Propstat.Prop.Fileid, + } + } + + for _, file := range destination.Response { + path := file.Href[preCount:] + var extension = filepath.Ext(path) + var name = path[0:len(path)-len(extension)][len(job.job.DestinationDir):] + + time, err := time.Parse("Mon, 02 Jan 2006 15:04:05 GMT", file.Propstat.Prop.Getlastmodified) + if err != nil { + logger.Error("%s", err) + } + size, err := strconv.Atoi(file.Propstat.Prop.Size) + if err != nil { + logger.Error("%s", err) + } + destinationMap[name] = ncFiles{ + extension: extension, + path: path, + lastModified: time, + size: size, + contentType: file.Propstat.Prop.Getcontenttype, + fileid: file.Propstat.Prop.Fileid, + } + } + + for index, source := range sourceMap { + // check if the file exists in the destination map + if dest, exists := destinationMap[index]; exists { + // compare timestamp and size + if dest.lastModified.Before(source.lastModified) { + job.convertFile(source.path, source.fileid, dest.path) + } + delete(destinationMap, index) + } else { + job.convertFile( + source.path, source.fileid, job.getDestinationDir(source.path), + ) + delete(destinationMap, index) + } + } + + // delete the files which are not available anymore + for _, dest := range destinationMap { + job.deleteFile(dest.path) + } +} + +func (job *convertJob) getDestinationDir(sourceFile string) string { + sourceFile = sourceFile[len(job.job.SourceDir):] + var extension = filepath.Ext(sourceFile) + var name = sourceFile[0:len(sourceFile)-len(extension)] + + return job.job.DestinationDir + name + ".pdf" +} + +func (job *convertJob) createFoldersRecursively(destinationFile string) { + s := strings.Split(destinationFile, "/") + folderTree := "" + + logger.Debug("Creating directory for file '%s'", destinationFile) + + // webdav doesn't have an function to create directories recursively + for _, folder := range s[:len(s) - 1] { + folderTree += folder + "/" + + client := http.Client{Timeout: 5 * time.Second} + req, err := http.NewRequest("MKCOL", job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + folderTree, nil) + if err != nil { + logger.Error("%s", err) + } + req.SetBasicAuth(job.user.Username, job.user.Password) + + res, err := client.Do(req) + if err != nil { + logger.Error("%s", err) + } + if (res.StatusCode != 201 && res.StatusCode != 405) { + + } + // status code 201 or 405 (already existing) + } +} + +func (job *convertJob) convertFile(sourceFile string, sourceid int, destinationFile string) { + logger.Debug("Trying to convert %s (%d) to %s", sourceFile, sourceid, destinationFile) + + job.createFoldersRecursively(destinationFile) + + client := http.Client{Timeout: 10 * time.Second} + req, err := http.NewRequest(http.MethodGet, job.user.NextcloudBaseUrl + "/apps/onlyoffice/downloadas", nil) + if err != nil { + logger.Error("%s", err) + } + req.SetBasicAuth(job.user.Username, job.user.Password) + + q := req.URL.Query() + q.Add("fileId", fmt.Sprint(sourceid)) + q.Add("toExtension", "pdf") + req.URL.RawQuery = q.Encode() + + res, err := client.Do(req) + if err != nil { + logger.Error("%s", err) + } + // Status Code 200 + defer res.Body.Close() + + uploadClient := http.Client{Timeout: 10 * time.Second} + uploadReq, err := http.NewRequest(http.MethodPut, job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + destinationFile, res.Body) + + if err != nil { + logger.Error("%s", err) + } + uploadReq.SetBasicAuth(job.user.Username, job.user.Password) + uploadReq.Header.Set("Content-Type", "application/binary") + + res, err = uploadClient.Do(uploadReq) + if err != nil { + logger.Error("%s", err) + } + + if (res.StatusCode != 204 && res.StatusCode != 201) { + logger.Error("Failed to create file %s (#%d)", destinationFile, res.StatusCode) + } + // Status Code 201 + res.Body.Close() +} + +func (job *convertJob) deleteFile(filePath string) { + client := http.Client{Timeout: 5 * time.Second} + + req, err := http.NewRequest(http.MethodDelete, job.user.NextcloudBaseUrl + "/remote.php/dav/files/" + job.user.Username + "/" + filePath, nil) + if err != nil { + logger.Error("%s", err) + } + req.SetBasicAuth(job.user.Username, job.user.Password) + + res, err := client.Do(req) + if err != nil { + logger.Error("%s", err) + } + + if (res.StatusCode != 204) { + logger.Error("Failed to delete file %s (%d)", filePath, res.StatusCode) + } +} + +// Searches all doc files in the source directory +func (job *convertJob) searchInDirectory(directory string, contentType []string) *searchResult { + client := http.Client{Timeout: 5 * time.Second} + + template, err := template.ParseFS(web.ApiTemplateFiles, "apitemplate/ncsearch.tmpl.xml") + if err != nil { + logger.Error("%s", err) + } + var buf bytes.Buffer + templateData := searchTemplateData{ + Username: job.user.Username, + Directory: directory, + ContentType: contentType, + } + if err = template.Execute(&buf, templateData); err != nil { + logger.Error("%s", err) + } + // Status code 207 + req, err := http.NewRequest("SEARCH", job.user.NextcloudBaseUrl + "/remote.php/dav/", &buf) + if err != nil { + logger.Error("%s", err) + } + req.SetBasicAuth(job.user.Username, job.user.Password) + req.Header.Set("Content-Type", "application/xml") + + res, err := client.Do(req) + if err != nil { + logger.Error("%s", err) + } + + defer res.Body.Close() + + resBody, err := io.ReadAll(res.Body) + if err != nil { + logger.Error("%s", err) + } + + fmt.Print(res.StatusCode) + var result searchResult + if err = xml.Unmarshal(resBody, &result); err != nil { + logger.Error("%s", err) + } + + return &result } \ No newline at end of file diff --git a/internal/ncworker/scheduler.go b/internal/ncworker/scheduler.go deleted file mode 100644 index 00451f2..0000000 --- a/internal/ncworker/scheduler.go +++ /dev/null @@ -1,7 +0,0 @@ -package ncworker - - - -func scheduleExecutions() { - -} \ No newline at end of file diff --git a/web/apitemplate/ncsearch.tmpl.xml b/web/apitemplate/ncsearch.tmpl.xml new file mode 100644 index 0000000..d94b520 --- /dev/null +++ b/web/apitemplate/ncsearch.tmpl.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + /files/{{.Username}}/{{.Directory}} + + + + + + + {{range .ContentType}} + + + + + {{ . }} + + {{end}} + + + + + + 100 + + + + + + \ No newline at end of file diff --git a/web/efs.go b/web/efs.go index 2cd862f..48565fb 100644 --- a/web/efs.go +++ b/web/efs.go @@ -11,4 +11,7 @@ var FrontendFiles embed.FS var DevelopeFiles embed.FS //go:embed "template" -var TemplateFiles embed.FS \ No newline at end of file +var TemplateFiles embed.FS + +//go:embed "apitemplate" +var ApiTemplateFiles embed.FS \ No newline at end of file