Browse Source

Support adding timestamps to imported rows

* Add `--timestamp` flag to enable adding timestamps.
* Add `timestamp` column to table schema.
* Efficiently add RFC3339-based timestamp (used by SQLite) to rows.
* Fix minor bug caused by inconsistent use of csvPath variable.
Jonathan Storm 3 months ago
parent
commit
d9d5ca9738
1 changed files with 65 additions and 17 deletions
  1. 65 17
      main.go

+ 65 - 17
main.go

@@ -10,6 +10,7 @@ import (
 	"database/sql"
 	"encoding/csv"
 	"errors"
+	"flag"
 	"fmt"
 	"io"
 	"os"
@@ -28,27 +29,40 @@ import (
 
 var (
 	Version, Build string
+	WithTimestamp  bool
 )
 
-const MaxRecords = 1_000_000_000
-
-const SQLITE_CONSTRAINT_PRIMARYKEY = 1555
+const (
+	MaxRecords                   = 1_000_000_000
+	SQLITE_CONSTRAINT_PRIMARYKEY = 1555
+)
 
 func main() {
 	log := logger.NewLogger()
 
-	if len(os.Args) != 2 {
+	flag.BoolVar(
+		&WithTimestamp,
+		"timestamp",
+		false,
+		"Add a column 'timestamp' to imported rows. "+
+			"All rows receive the same value for 'timestamp', "+
+			"calculated at the start of the import. "+
+			"",
+	)
+	flag.Parse()
+
+	if len(flag.Args()) != 1 {
 		if Build == "" {
 			Version = "dev"
 			Build = time.Now().UTC().Format(time.RFC3339)
 		}
 		fmt.Printf("%s %s\n", Version, Build)
-		fmt.Printf("usage: %s <csv_path>\n", os.Args[0])
+		fmt.Printf("usage: %s [--timestamp] <csv_path>\n", os.Args[0])
 		os.Exit(1)
 	}
-	csvPath, err := filepath.Abs(os.Args[1])
+	csvPath, err := filepath.Abs(flag.Args()[0])
 	if err != nil {
-		log.Fatal("resolve absolute path for '%s': %v", os.Args[1], err)
+		log.Fatal("resolve absolute path for '%s': %v", flag.Args()[0], err)
 	}
 	ctx := context.WithValue(context.Background(), "log", log)
 	ctx, cancel := context.WithCancel(ctx)
@@ -67,7 +81,7 @@ func main() {
 	}()
 
 	// open csv for read
-	f, err := os.Open(os.Args[1])
+	f, err := os.Open(csvPath)
 	if err != nil {
 		log.Fatal("open csv: %v", err)
 	}
@@ -95,14 +109,29 @@ func main() {
 		}
 	}()
 
-	// create table, prepare insert
-	var insert *sql.Stmt
-	headers := make([]string, 0, 32)
-
+	// Read headers
 	rec, err := r.Read()
 	if err != nil && err != io.EOF {
 		log.Fatal("read csv '%s': %v", csvPath, err)
 	}
+
+	// Prepare args with timestamp, as necessary.
+	argsLen := len(rec)
+	if WithTimestamp {
+		argsLen++
+	}
+	args := make([]any, argsLen)
+	if WithTimestamp {
+		args[0] = any(FormatRFC3339Subsec(time.Now().UTC()))
+	}
+
+	// create table, prepare insert
+	var insert *sql.Stmt
+	headers := make([]string, 0, argsLen)
+
+	if WithTimestamp {
+		headers = append(headers, "timestamp")
+	}
 	for _, f := range rec {
 		f = scrubName(f)
 		headers = append(headers, f)
@@ -162,6 +191,7 @@ func main() {
 			}
 			break
 		}
+
 		// Rather slow, doing this for every record, but since
 		// we cannot know how many fields will exist ahead of
 		// time (we compile *now*, not after we've read the
@@ -169,11 +199,17 @@ func main() {
 		// each converted to `any`, by hand. It may be faster to
 		// simply print the SQL statements and pipe them to
 		// sqlite. Should check this.
-		args := make([]any, len(rec))
-		for j := 0; j < len(rec); j++ {
-			args[j] = any(rec[j])
+
+		recArgs := args[:]
+		if WithTimestamp {
+			// Skip over timestamp
+			recArgs = args[1:]
+		}
+		for j := 0; j < len(recArgs); j++ {
+			recArgs[j] = any(rec[j])
 		}
-      	// For explanation of extended codes, see https://www.sqlite.org/rescode.html
+
+		// For explanation of extended codes, see https://www.sqlite.org/rescode.html
 		_, err = insert.ExecContext(ctx, args...)
 		if err != nil {
 			var sqliteErr sqlite3.Error
@@ -272,4 +308,16 @@ func scrubName(s string) string {
 	s = strings.TrimSuffix(s, "_")
 
 	return fmt.Sprintf("'%s'", s)
-}
+}
+
+func FormatRFC3339Subsec(t time.Time) string {
+	// RFC3339Nano = "2006-01-02T15:04:05.999999999Z07:00"
+	rfc3339 := t.Format(time.RFC3339Nano)
+	datetime := rfc3339[:19]
+	nano := rfc3339[20:23] // Just ms
+	return fmt.Sprintf(
+		"%s.%sZ",
+		datetime,
+		nano,
+	)
+}