From 12e6d9da17eef680aaf12dd47a474bbe13c6b634 Mon Sep 17 00:00:00 2001 From: igor Date: Thu, 5 Feb 2026 10:40:25 +0100 Subject: [PATCH] pridany zdrojovy kod, plne generovane AI Codex (OpenAI) --- go.mod | 23 ++ go.sum | 27 +++ main.go | 690 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 740 insertions(+) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..5666aed --- /dev/null +++ b/go.mod @@ -0,0 +1,23 @@ +module pdftrim + +go 1.23.0 + +toolchain go1.23.2 + +require ( + github.com/pdfcpu/pdfcpu v0.10.2 + rsc.io/pdf v0.1.1 +) + +require ( + github.com/hhrutter/lzw v1.0.0 // indirect + github.com/hhrutter/pkcs7 v0.2.0 // indirect + github.com/hhrutter/tiff v1.0.2 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + golang.org/x/crypto v0.37.0 // indirect + golang.org/x/image v0.26.0 // indirect + golang.org/x/text v0.24.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..87ce602 --- /dev/null +++ b/go.sum @@ -0,0 +1,27 @@ +github.com/hhrutter/lzw v1.0.0 h1:laL89Llp86W3rRs83LvKbwYRx6INE8gDn0XNb1oXtm0= +github.com/hhrutter/lzw v1.0.0/go.mod h1:2HC6DJSn/n6iAZfgM3Pg+cP1KxeWc3ezG8bBqW5+WEo= +github.com/hhrutter/pkcs7 v0.2.0 h1:i4HN2XMbGQpZRnKBLsUwO3dSckzgX142TNqY/KfXg+I= +github.com/hhrutter/pkcs7 v0.2.0/go.mod h1:aEzKz0+ZAlz7YaEMY47jDHL14hVWD6iXt0AgqgAvWgE= +github.com/hhrutter/tiff v1.0.2 h1:7H3FQQpKu/i5WaSChoD1nnJbGx4MxU5TlNqqpxw55z8= +github.com/hhrutter/tiff v1.0.2/go.mod h1:pcOeuK5loFUE7Y/WnzGw20YxUdnqjY1P0Jlcieb/cCw= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/pdfcpu/pdfcpu v0.10.2 h1:DB2dWuoq0eF0QwHjgyLirYKLTCzFOoZdmmIUSu72aL0= +github.com/pdfcpu/pdfcpu v0.10.2/go.mod h1:Q2Z3sqdRqHTdIq1mPAUl8nfAoim8p3c1ASOaQ10mCpE= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= +golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/image v0.26.0 h1:4XjIFEZWQmCZi6Wv8BoxsDhRU3RVnLX04dToTDAEPlY= +golang.org/x/image v0.26.0/go.mod h1:lcxbMFAovzpnJxzXS3nyL83K27tmqtKzIJpctK8YO5c= +golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= +golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +rsc.io/pdf v0.1.1 h1:k1MczvYDUvJBe93bYd7wrZLLUEcLZAuF824/I4e5Xr4= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/main.go b/main.go new file mode 100644 index 0000000..5cde728 --- /dev/null +++ b/main.go @@ -0,0 +1,690 @@ +package main + +import ( + "errors" + "fmt" + "math" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/pdfcpu/pdfcpu/pkg/api" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" + "rsc.io/pdf" +) + +const ( + paddingPoints = 6.0 + minSidePoints = 1.0 +) + +type rect struct { + llx float64 + lly float64 + urx float64 + ury float64 +} + +type pageCrop struct { + crop rect + apply bool +} + +type matrix [3][3]float64 + +var identity = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}} + +func main() { + if len(os.Args) < 2 { + fmt.Fprintf(os.Stderr, "Pouzitie: %s subor.pdf\n", filepath.Base(os.Args[0])) + os.Exit(1) + } + + input := os.Args[1] + if strings.ToLower(filepath.Ext(input)) != ".pdf" { + fmt.Fprintln(os.Stderr, "Chyba: prvy argument musi byt PDF subor.") + os.Exit(1) + } + + if _, err := os.Stat(input); err != nil { + fmt.Fprintf(os.Stderr, "Chyba: subor %q neexistuje alebo sa neda citat (%v).\n", input, err) + os.Exit(1) + } + + output := trimmedPath(input) + if err := trimPDF(input, output); err != nil { + fmt.Fprintf(os.Stderr, "Chyba: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Hotovo: %s\n", output) +} + +func trimPDF(input, output string) error { + reader, err := pdf.Open(input) + if err != nil { + return fmt.Errorf("nepodarilo sa otvorit PDF: %w", err) + } + + pageCount := reader.NumPage() + if pageCount == 0 { + return errors.New("PDF nema ziadne strany") + } + + plan := make([]pageCrop, pageCount) + for i := 1; i <= pageCount; i++ { + p := reader.Page(i) + if p.V.IsNull() { + continue + } + + pageBox, err := effectivePageBox(p.V) + if err != nil { + return fmt.Errorf("strana %d: %w", i, err) + } + + contentBox, found := detectContentBox(p) + if !found { + continue + } + + contentBox = contentBox.expand(paddingPoints).clamp(pageBox) + if contentBox.width() < minSidePoints || contentBox.height() < minSidePoints { + continue + } + + if nearlyEqualRect(contentBox, pageBox, 0.01) { + continue + } + + plan[i-1] = pageCrop{crop: contentBox, apply: true} + } + + if err := copyFile(input, output); err != nil { + return fmt.Errorf("nepodarilo sa pripravit vystupny subor: %w", err) + } + + for i, item := range plan { + if !item.apply { + continue + } + + boxDef := item.crop.boxDef() + cropBox, err := model.ParseBox(boxDef, types.POINTS) + if err != nil { + return fmt.Errorf("strana %d: neplatny crop box %s: %w", i+1, boxDef, err) + } + + pageSel := []string{strconv.Itoa(i + 1)} + if err := api.CropFile(output, output, pageSel, cropBox, nil); err != nil { + return fmt.Errorf("strana %d: crop zlyhal: %w", i+1, err) + } + + pageBounds, err := api.PageBoundaries("media:"+boxDef, types.POINTS) + if err != nil { + return fmt.Errorf("strana %d: nepodarilo sa pripravit media box: %w", i+1, err) + } + if err := api.AddBoxesFile(output, output, pageSel, pageBounds, nil); err != nil { + return fmt.Errorf("strana %d: nastavenie media box zlyhalo: %w", i+1, err) + } + } + + return nil +} + +func detectContentBox(p pdf.Page) (rect, bool) { + var box rect + found := false + + add := func(r rect) { + r = r.normalize() + if !r.valid() { + return + } + if !found { + box = r + found = true + return + } + box = box.union(r) + } + + if tb, ok := detectTextBox(p); ok { + add(tb) + } + + if gb, ok := detectGraphicsBox(p); ok { + add(gb) + } + + return box, found +} + +type textState struct { + Tc float64 + Tw float64 + Th float64 + Tl float64 + Tf pdf.Font + Tfs float64 + Trise float64 + Tm matrix + Tlm matrix + CTM matrix +} + +func detectTextBox(p pdf.Page) (rect, bool) { + var box rect + found := false + + add := func(r rect) { + r = r.normalize() + if !r.valid() { + return + } + if !found { + box = r + found = true + return + } + box = box.union(r) + } + + g := textState{ + Th: 1, + CTM: identity, + Tm: identity, + Tlm: identity, + } + + var enc pdf.TextEncoding + var gstack []textState + + showText := func(raw string) { + decoded := raw + if enc != nil { + decoded = enc.Decode(raw) + } + + rawBytes := []byte(raw) + i := 0 + + for _, ch := range decoded { + trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM) + + w0 := 500.0 + if i < len(rawBytes) && !g.Tf.V.IsNull() { + w0 = g.Tf.Width(int(rawBytes[i])) + } + if i < len(rawBytes) { + i++ + } + + charWidth := math.Abs(w0 / 1000 * trm[0][0]) + h := math.Abs(g.Tfs) + if h <= 0 { + h = 8 + } + + if !unicodeIsSpace(ch) { + add(rect{ + llx: trm[2][0], + lly: trm[2][1] - 0.30*h, + urx: trm[2][0] + math.Max(charWidth, 0.2*h), + ury: trm[2][1] + 0.90*h, + }) + } + + tx := w0/1000*g.Tfs + g.Tc + if ch == ' ' { + tx += g.Tw + } + tx *= g.Th + + g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm) + } + } + + if !interpretPageContent(p, func(stk *pdf.Stack, op string) { + args := popArgs(stk) + + switch op { + case "q": + gstack = append(gstack, g) + case "Q": + if len(gstack) == 0 { + return + } + g = gstack[len(gstack)-1] + gstack = gstack[:len(gstack)-1] + case "cm": + m, ok := matrixFromArgs(args) + if ok { + g.CTM = m.mul(g.CTM) + } + case "BT": + g.Tm = identity + g.Tlm = identity + case "T*": + x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}} + g.Tlm = x.mul(g.Tlm) + g.Tm = g.Tlm + case "Tc": + if len(args) == 1 { + g.Tc = args[0].Float64() + } + case "TD": + if len(args) != 2 { + return + } + g.Tl = -args[1].Float64() + fallthrough + case "Td": + if len(args) != 2 { + return + } + tx := args[0].Float64() + ty := args[1].Float64() + x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}} + g.Tlm = x.mul(g.Tlm) + g.Tm = g.Tlm + case "Tf": + if len(args) != 2 { + return + } + g.Tf = p.Font(args[0].Name()) + g.Tfs = args[1].Float64() + enc = nil + if !g.Tf.V.IsNull() { + enc = g.Tf.Encoder() + } + case "\"": + if len(args) != 3 { + return + } + g.Tw = args[0].Float64() + g.Tc = args[1].Float64() + showText(args[2].RawString()) + case "'": + if len(args) != 1 { + return + } + x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}} + g.Tlm = x.mul(g.Tlm) + g.Tm = g.Tlm + showText(args[0].RawString()) + case "Tj": + if len(args) == 1 { + showText(args[0].RawString()) + } + case "TJ": + if len(args) != 1 { + return + } + v := args[0] + for i := 0; i < v.Len(); i++ { + x := v.Index(i) + if x.Kind() == pdf.String { + showText(x.RawString()) + continue + } + tx := -x.Float64() / 1000 * g.Tfs * g.Th + g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm) + } + case "TL": + if len(args) == 1 { + g.Tl = args[0].Float64() + } + case "Tm": + m, ok := matrixFromArgs(args) + if ok { + g.Tm = m + g.Tlm = m + } + case "Ts": + if len(args) == 1 { + g.Trise = args[0].Float64() + } + case "Tw": + if len(args) == 1 { + g.Tw = args[0].Float64() + } + case "Tz": + if len(args) == 1 { + g.Th = args[0].Float64() / 100 + } + } + }) { + return rect{}, false + } + + return box, found +} + +func detectGraphicsBox(p pdf.Page) (rect, bool) { + var box rect + found := false + + add := func(r rect) { + r = r.normalize() + if !r.valid() { + return + } + if !found { + box = r + found = true + return + } + box = box.union(r) + } + + xObjects := p.Resources().Key("XObject") + + ctm := identity + var ctmStack []matrix + + if !interpretPageContent(p, func(stk *pdf.Stack, op string) { + args := popArgs(stk) + + switch op { + case "q": + ctmStack = append(ctmStack, ctm) + + case "Q": + if len(ctmStack) == 0 { + return + } + ctm = ctmStack[len(ctmStack)-1] + ctmStack = ctmStack[:len(ctmStack)-1] + + case "cm": + m, ok := matrixFromArgs(args) + if ok { + ctm = m.mul(ctm) + } + + case "re": + if len(args) != 4 { + return + } + x := args[0].Float64() + y := args[1].Float64() + w := args[2].Float64() + h := args[3].Float64() + add(transformedRect(ctm, x, y, x+w, y+h)) + + case "Do": + if len(args) != 1 { + return + } + + name := args[0].Name() + if name == "" || xObjects.IsNull() { + return + } + + xobj := xObjects.Key(name) + subtype := xobj.Key("Subtype").Name() + if subtype == "Image" || subtype == "Form" { + add(transformedRect(ctm, 0, 0, 1, 1)) + } + } + }) { + return rect{}, false + } + + return box, found +} + +func interpretPageContent(p pdf.Page, fn func(stk *pdf.Stack, op string)) bool { + contents := p.V.Key("Contents") + if contents.IsNull() { + return false + } + + ok := false + run := func(stream pdf.Value) { + if safeInterpretStream(stream, fn) { + ok = true + } + } + + switch contents.Kind() { + case pdf.Stream: + run(contents) + case pdf.Array: + for i := 0; i < contents.Len(); i++ { + stream := contents.Index(i) + if stream.IsNull() || stream.Kind() != pdf.Stream { + continue + } + run(stream) + } + } + + return ok +} + +func safeInterpretStream(stream pdf.Value, fn func(stk *pdf.Stack, op string)) (ok bool) { + if stream.IsNull() || stream.Kind() != pdf.Stream { + return false + } + + defer func() { + if recover() != nil { + ok = false + } + }() + + pdf.Interpret(stream, fn) + return true +} + +func unicodeIsSpace(r rune) bool { + if r == ' ' || r == '\t' || r == '\n' || r == '\r' { + return true + } + return false +} + +func popArgs(stk *pdf.Stack) []pdf.Value { + n := stk.Len() + if n == 0 { + return nil + } + + args := make([]pdf.Value, n) + for i := n - 1; i >= 0; i-- { + args[i] = stk.Pop() + } + + return args +} + +func matrixFromArgs(args []pdf.Value) (matrix, bool) { + if len(args) != 6 { + return matrix{}, false + } + + var m matrix + for i := 0; i < 6; i++ { + m[i/2][i%2] = args[i].Float64() + } + m[2][2] = 1 + + return m, true +} + +func transformedRect(m matrix, x0, y0, x1, y1 float64) rect { + p1x, p1y := transformPoint(m, x0, y0) + p2x, p2y := transformPoint(m, x1, y0) + p3x, p3y := transformPoint(m, x0, y1) + p4x, p4y := transformPoint(m, x1, y1) + + minX := math.Min(math.Min(p1x, p2x), math.Min(p3x, p4x)) + minY := math.Min(math.Min(p1y, p2y), math.Min(p3y, p4y)) + maxX := math.Max(math.Max(p1x, p2x), math.Max(p3x, p4x)) + maxY := math.Max(math.Max(p1y, p2y), math.Max(p3y, p4y)) + + return rect{llx: minX, lly: minY, urx: maxX, ury: maxY} +} + +func transformPoint(m matrix, x, y float64) (float64, float64) { + px := x*m[0][0] + y*m[1][0] + m[2][0] + py := x*m[0][1] + y*m[1][1] + m[2][1] + return px, py +} + +func effectivePageBox(page pdf.Value) (rect, error) { + if r, ok := inheritedRect(page, "CropBox"); ok { + return r, nil + } + if r, ok := inheritedRect(page, "MediaBox"); ok { + return r, nil + } + return rect{}, errors.New("chybajuci MediaBox/CropBox") +} + +func inheritedRect(v pdf.Value, key string) (rect, bool) { + for cur := v; !cur.IsNull(); cur = cur.Key("Parent") { + candidate := cur.Key(key) + r, err := rectFromArray(candidate) + if err == nil { + return r, true + } + } + return rect{}, false +} + +func rectFromArray(v pdf.Value) (rect, error) { + if v.IsNull() || v.Len() != 4 { + return rect{}, errors.New("neplatny rectangle") + } + + r := rect{ + llx: v.Index(0).Float64(), + lly: v.Index(1).Float64(), + urx: v.Index(2).Float64(), + ury: v.Index(3).Float64(), + }.normalize() + + if !r.valid() { + return rect{}, errors.New("neplatny rectangle") + } + + return r, nil +} + +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + + _, err = out.ReadFrom(in) + if closeErr := out.Close(); closeErr != nil && err == nil { + err = closeErr + } + + if err != nil { + _ = os.Remove(dst) + } + return err +} + +func trimmedPath(input string) string { + ext := filepath.Ext(input) + base := strings.TrimSuffix(input, ext) + return base + "-trimmed.pdf" +} + +func nearlyEqualRect(a, b rect, eps float64) bool { + return math.Abs(a.llx-b.llx) <= eps && + math.Abs(a.lly-b.lly) <= eps && + math.Abs(a.urx-b.urx) <= eps && + math.Abs(a.ury-b.ury) <= eps +} + +func (m matrix) mul(other matrix) matrix { + var out matrix + for i := 0; i < 3; i++ { + for j := 0; j < 3; j++ { + for k := 0; k < 3; k++ { + out[i][j] += m[i][k] * other[k][j] + } + } + } + return out +} + +func (r rect) normalize() rect { + if r.llx > r.urx { + r.llx, r.urx = r.urx, r.llx + } + if r.lly > r.ury { + r.lly, r.ury = r.ury, r.lly + } + return r +} + +func (r rect) valid() bool { + return !(math.IsNaN(r.llx) || math.IsNaN(r.lly) || math.IsNaN(r.urx) || math.IsNaN(r.ury) || + math.IsInf(r.llx, 0) || math.IsInf(r.lly, 0) || math.IsInf(r.urx, 0) || math.IsInf(r.ury, 0)) +} + +func (r rect) width() float64 { + return r.urx - r.llx +} + +func (r rect) height() float64 { + return r.ury - r.lly +} + +func (r rect) union(other rect) rect { + return rect{ + llx: math.Min(r.llx, other.llx), + lly: math.Min(r.lly, other.lly), + urx: math.Max(r.urx, other.urx), + ury: math.Max(r.ury, other.ury), + } +} + +func (r rect) expand(padding float64) rect { + return rect{ + llx: r.llx - padding, + lly: r.lly - padding, + urx: r.urx + padding, + ury: r.ury + padding, + } +} + +func (r rect) clamp(limit rect) rect { + return rect{ + llx: clamp(r.llx, limit.llx, limit.urx), + lly: clamp(r.lly, limit.lly, limit.ury), + urx: clamp(r.urx, limit.llx, limit.urx), + ury: clamp(r.ury, limit.lly, limit.ury), + } +} + +func (r rect) boxDef() string { + return fmt.Sprintf("[%.4f %.4f %.4f %.4f]", r.llx, r.lly, r.urx, r.ury) +} + +func clamp(v, minV, maxV float64) float64 { + if v < minV { + return minV + } + if v > maxV { + return maxV + } + return v +}