/*!***************************************************************************
*!
*! FILE NAME  : train_mod.c
*!
*! DESCRIPTION: Synchronous serial driver module for model railway train
*!              control, for the ETRAX 100LX chip by Axis Communications AB.
*!              Designed to work with Linux version 2.6.19 and Axis SDK
*!              version 2.10. See http://developer.axis.com/ .
*!
*!              Intended for use with the Acme systems FOX board, see
*!              http://www.acmesystems.it/ , but it should work on other
*!              ETRAX 100LX based platforms as well.
*!
*!              The module is accessed from the user application through
*!              the open, write and close functions. The module uses the
*!              syncser1 device (major 125, minor 1) by default, but it
*!              is possible to select syncser0 (major 125, minor 0) instead,
*!              or both. This is selected with module parameters at module
*!              installation. The syncser0 device corresponds to sync. serial
*!              port 1 on ETRAX 100LX, and the syncser1 device corresponds
*!              to sync. serial port 3. Only the sync. serial port 3 is useful
*!              on the FOX board, since the pins for sync. serial port 1 are
*!              used for USB.
*!
*!              Each write command should contain from 1 to 32 bytes, where
*!              the first byte is a command byte, and the rest is packet
*!              data or command parameters. Erroneous or too long commands
*!              will be silently consumed. The write command times out
*!              and returns 0 if the transmitter is busy for more than
*!              one jiffie. The application should then make a retry.
*!
*!              The following commands are defined (in train_mod.h) for
*!              the moment:
*!
*!              train_cmd_nop        No operation.
*!
*!              train_cmd_dcc        DCC train command. The command consists
*!                                   of 3 to 6 bytes. Byte 0 is the command
*!                                   code, and the following two to five bytes
*!                                   contain the train command data. The train
*!                                   command is sent msb first and byte 1
*!                                   first. The preamble, start bits, error
*!                                   detection byte and packet end bits are
*!                                   added by the driver and should not be
*!                                   included in the command.
*!
*!                                   The DCC packet is transmitted repeatedly
*!                                   until a new command is given.
*!
*!              train_cmd_mfx        Märklin/ESU mfx train command. The command
*!                                   consists of 4 to 10 bytes, and sends one
*!                                   mfx packet. Byte 0 is the command code.
*!                                   Byte 1 contains the length of the packet
*!                                   (in number of bits, excluding flags, CRC
*!                                   and bit stuffing). The following two to
*!                                   eight bytes contain the train command
*!                                   data. The data is sent msb first within
*!                                   bytes. Any remaining bits in the last byte
*!                                   of the command are ignored. The start and
*!                                   end flags as well as the CRC and bit
*!                                   stuffing are added by the driver and
*!                                   should not be included in the command.
*!
*!                                   The mfx packet is transmitted repeatedly
*!                                   until a new command is given.
*!
*!              train_cmd_mfx_once   Similar to 'train_cmd_mfx' but the
*!                                   command is not repeated. Instead, a
*!                                   Märklin/Motorola idle packet is sent
*!                                   repeatedly after the command is completed
*!                                   until a new command is given.
*!
*!                                   This command also allows several
*!                                   concatenated mfx packets, up to a maximum
*!                                   total command size of 32 bytes. Each mfx
*!                                   packet starts with one byte containing
*!                                   the packet length, and the following bytes
*!                                   in each packet contain the packet data.
*!
*!              train_cmd_mm         Märklin/Motorola train command.
*!                                   The command consists of 4 bytes. Byte 0
*!                                   is the command code, and the following
*!                                   three bytes contain the train command
*!                                   data. The train command is sent lsb first
*!                                   and byte 1 first. A Märklin/Motorola
*!                                   packet is 18 bits, so the 6 upper bits of
*!                                   byte 3 are ignored.
*!
*!                                   The Märklin/Motorola packet is transmitted
*!                                   repeatedly until a new command is given.
*!
*!              train_cmd_mmd        Märklin/Motorola accessory command.
*!                                   Similar to train_cmd_mm but data is
*!                                   sent at double speed.
*!
*!              train_cmd_mm_pause   Märklin/Motorola pause configuration.
*!                                   The command consists of 3 bytes. Byte 0
*!                                   is the commmand code and the two following
*!                                   bytes form a 16-bit pause value, with
*!                                   byte 1 as the LSB and byte 2 as the MSB.
*!                                   The value should be given in units of
*!                                   17.36 us. Standard pause values for the
*!                                   Märklin/Motorola protocol are defined in
*!                                   train_mod.h .
*!
*!              For information about the Märklin/Motorola format, see
*!              http://spazioinwind.libero.it/scorzoni/motorola.htm
*!
*!              For information about the Märklin/ESU mfx format, see
*!              http://www.alice-dsl.net/mue473/mfxmenue.htm
*!
*!              For information about the DCC format, see
*!              http://www.nmra.org/standards/DCC/standards_rps/DCCStds.html
*!
*!              Known problems:
*!              ===============
*!
*!              1. Since there is no system-wide shadow register in the
*!                 Axis SDK 2.10 for the R_SYNC_SERIAL_PRESCALE register,
*!                 it is not possible to let this driver coexist with another
*!                 driver for the other sync. serial port. This would be
*!                 difficult anyway, since some configurations are common to
*!                 both ports.
*!
*! Version: 0.0, 2007-01-27:   - Initial version, not tested.
*!
*! Version: 0.1, 2007-01-29:   - Cleaned up the write function by hiding
*!                               the details in subroutines.
*!
*!                             - Added a pair of protocol independent
*!                               "done" DMA descriptors to simplify
*!                               transitions between different protocols.
*!
*!                             - Done some initial testing.
*!
*!                             - Added the train_cmd_mmd command. Only
*!                               checked with oscilloscope since I do not
*!                               have any MM-compatible accessories.
*!
*! Version: 0.2, 2007-02-12:   - Added support for DCC. Very little tested.
*!
*! Version: 0.3, 2007-05-29:   - Moved to Linux 2.6.19 and Axis SDK 2.10.
*!
*! Version: 0.4, 2007-09-08:   - Updated the link to the mfx protocol
*!                               description. No functional change.
*!
*! Version: 0.5, 2008-09-24:   - Added some (not complete) mfx support.
*!
*! ---------------------------------------------------------------------------
*!
*! (C) Copyright 2007, 2008 Per Zander, SWEDEN http://home.swipnet.se/perz/
*!
*! ---------------------------------------------------------------------------
*!
*!    This program is free software; you can redistribute it and/or modify
*!    it under the terms of the GNU General Public License as published by
*!    the Free Software Foundation; either version 2 of the License, or
*!    (at your option) any later version.
*!
*!    This program is distributed in the hope that it will be useful,
*!    but WITHOUT ANY WARRANTY; without even the implied warranty of
*!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*!    GNU General Public License for more details.
*!
*!		To have a copy of the GNU General Public License write to the Free
*!    Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
*!    02110-1301  USA.
*!
*!***************************************************************************/

//------------------------------------------------------------------
// Includes.
//------------------------------------------------------------------

//----- Linux includes. -----*

#include <linux/init.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <asm/uaccess.h>
#include <asm/arch/io_interface_mux.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/dma.h>

//----- Application include file. -----*

#include "train_mod.h"

//------------------------------------------------------------------
// Defines.
//------------------------------------------------------------------

//----- Device nodes. -----*

#define SYNCSER_MAJOR 125
#define SYNCSER_NAME "syncser"

//----- Data output polarity. -----*

// Set it to 1 if you have an external inverter on the synchronous serial
// data output. Set it to 0 otherwise.

#define TRAIN_EXTERNAL_DATA_INVERTER 1

//----- USB port 1 allocation. -----*

// Set USB1_ALLOCATED to 1 if your kernel is configured to use the usb1 port.
// This is typically true with the precompiled kernels for the FOX board.
// Set it to 0 otherwise.

#define USB1_ALLOCATED 1

// Write command timeout, in number of jiffies.

#define TRAIN_WR_TIMEOUT 1

// Length of the protocol independent part of the pause between packets.
// Given in number of bytes i.e. in units of 17.36 us.

#define TRAIN_PACKET_DONE_PAUSE 4

// DCC idle packet.

#define TRAIN_DCC_IDLE_PACKET 0xff00ff

// Length of DCC 'end of packet' pattern, in units of 17.36 us.
// Must be between 7 and 12. The actual pattern is 6.75 bytes long,
// anything above that adds to the inter-packet gap.

#define TRAIN_DCC_END_LENGTH 8

// Parameters for the mfx CRC calculation.

#define INIT_CRC 0x5e
#define CRC_POLY 0x1c0

//------------------------------------------------------------------
// Prototypes.
//------------------------------------------------------------------

static irqreturn_t tr_interrupt(int irq, void * dev_id);
int train_open (struct inode * my_inode, struct file * my_file);
int train_release (struct inode * my_inode, struct file * my_file);
ssize_t train_write (struct file * my_file, const char __user * my_data,
                 size_t my_size, loff_t * offp);
static void train_exit(void);

//------------------------------------------------------------------
// Data type definitions.
//------------------------------------------------------------------

//----- Code violation patterns for generation of mfx flags. -----*

enum mfx_cv_pattern_t {
  cv_00v1 = 16,
  cv_0vv1 = 17,
  cv_10v1 = 18,
  cv_v100 = 19,
  cv_v101 = 20,
  cv_vv10 = 21
};

//----- Structure for port specific data. -----*

// Used by open and release functions.

struct train_dma_irq_reg_info {
  unsigned int dma_irq_nr;
  unsigned int dma_nr;
  enum dma_owner owner;
  const char * dma_irq_descr;
  volatile u32 * sser_ctrl;
  volatile u32 * dma_first;
  volatile u8 * dma_cmd;
  const volatile u8 * dma_status;
  volatile u8 * clr_intr;
  unsigned int mask_bit;
};

//----- Packet data received from the user application. -----*

// The first byte is a command byte and the rest is data, but it does not
// make sense to make that distinction in the structure definition.

struct train_packet_data {
  unsigned char data[32];
};

//----- Structures containing the DMA descriptor lists. -----*

// Märklin/Motorola half data packet descriptors. A Märklin/Motorola
// half data packet consists of 18 data bits. Each DMA descriptor handles
// 3 bits, so we need 6 descriptors.

struct train_mm_dma_descr_data {
  struct etrax_dma_descr d[6];
};

// Märklin/Motorola complete packet including pauses. We need two half
// data packets, one intra packet gap descriptor (t1) and two inter packet
// pause descriptors, one with the descriptor interrupt set (tr) and one
// without interrupt (t2).

struct train_mm_dma_descr {
  struct train_mm_dma_descr_data p[2];
  struct etrax_dma_descr t1;
  struct etrax_dma_descr t2;
  struct etrax_dma_descr tr;
};

// DCC packet descriptors. We need two descriptors for initial pauses, one (tr)
// with interrupt set, and one (t1) without interrupt. Then we need one
// descriptor for the preamble and 12 descriptors for the data (2 descriptors
// per byte). Finally one descriptor (e) for packet end.

struct train_dcc_dma_descr {
  struct etrax_dma_descr tr;
  struct etrax_dma_descr t1;
  struct etrax_dma_descr p;
  struct etrax_dma_descr d[12];
  struct etrax_dma_descr e;
};

// Descriptors for mfx packets. We need two descriptors for the initial pause,
// one (tr) with interrupt set, and one (t1) without interrupt. Then we need
// one descriptor for the initial flag and up to 72 descriptors for the mfx
// packets. There can be several mfx packets within one command from the
// application. Finally one descriptor (e) for the end of the last mfx packet.

struct train_mfx_dma_descr {
  struct etrax_dma_descr tr;
  struct etrax_dma_descr t1;
  struct etrax_dma_descr f;
  struct etrax_dma_descr d[72];
  struct etrax_dma_descr e;
};

// All descriptors are gathered in one struct for convenience.
// First we have two descriptors that mark the transition between commands.
// Then there are four complete Märklin/Motorola commands, two for normal
// speed (train) commands and two for double speed (accessory) commands.
// Thereafter two complete DCC commands followed by two mfx commands.

struct train_dma_descr_pool {
  struct etrax_dma_descr done[2];
  struct train_mm_dma_descr mm[4];
  struct train_dcc_dma_descr dcc[2];
  struct train_mfx_dma_descr mfx[2];
};

//----- Packet data patterns. -----*

// Märklin/Motorola data pattern struct. Märklin/Motorola packets are sent
// in chunks of 3 bits. Each bit is 0.208 ms long. With a sync. serial baud
// rate of 460.8 k three Märklin/Motorola bits correspond to 36 bytes of
// DMA buffer data.

struct train_mm_3_bits {
  unsigned char data[36];
};

// Märklin/Motorola double speed data pattern struct. Each bit is 0.104 ms
// long so it requires 18 bytes.

struct train_mmd_3_bits {
  unsigned char data[18];
};

// DCC data pattern struct. We use chunks of 4 bits, plus the start bit for the
// even nibbles. Since the bits have variable length we need to have an index
// into the pattern table instead of defining each pattern as a vector element.
// The 17th idx element defines the end of the '1111' pattern.

struct dcc_patterns {
  int idx[17];         
  unsigned char data[864];
};

// Märklin mfx data pattern struct. We use chunks of 4 bits. Each bit is 0.1
// ms long. With a sync. serial baud rate of 460.8 k a four bit chunk requires
// 23 bytes of DMA buffer data.

struct mfx_4_bits {
  unsigned char data[23];
};

// Each mfx data pattern is needed in two versions, one with data initially
// high and one with data initially low.

struct mfx_4_bit_pair {
  struct mfx_4_bits p[2];
};

// Gather all packet data patterns into one struct.
//
// To represent all possible combinations of 3-bit Märklin/Motorola data,
// we need 8 different data patterns. Then we also need a buffer containing
// the maximum length Märklin/Motorola pause. This buffer is used by the
// DCC and mfx protocols as well.
//
// The DCC patterns are all in one instance of the dcc_patterns struct.
//
// To represent all possible combinations of normal 4-bit mfx data, we need 16
// different data pattern pairs. Then we also need 6 special data pattern
// pairs for generation of the mfx packet start and end flags. So, in total,
// we need 22 pattern pairs for mfx.
//
// Some extra bytes are reserved for implementation of other protocols.

struct train_ser_data {
  struct train_mm_3_bits mm_data[8];
  struct train_mmd_3_bits mmd_data[8];
  unsigned char mm_pause[TRAIN_MM_MAX_PACKET_GAP];
  struct dcc_patterns dcc_pattern;
  struct mfx_4_bit_pair mfx_data[22];
  unsigned char data[660];
};

//----- Device struct. -----*

struct train_dev {
  struct train_dma_descr_pool * descr_list; // Pointer to start of DMA descr.
  int ref_cnt;                              // Count open - release.
  int nr;                                   // Which serial port is used?
  struct cdev my_cdev;
  struct semaphore sem;                     // To protect open and release.
  struct completion * cmpl;                 // To synchronize write and irq.
  int descr_list_nr;                        // Which list is free for write?
};

//----- Initialization progress levels. -----*

enum train_init_t {
  train_init_success,
  train_init_dma1,
  train_init_cdev0,
  train_init_dma0,
  train_init_started,
  train_init_ports,
  train_init_port1,
  train_init_region,
  train_init_none
};

//------------------------------------------------------------------
// Declarations.
//------------------------------------------------------------------

//----- Module parameters. -----*

static int major = SYNCSER_MAJOR;
static int minor = 1;
static int sser1 = 0;
static int sser3 = 1;
static char * name = SYNCSER_NAME;
static int sticky = 0;

module_param(major, int, S_IRUGO);
module_param(minor, int, S_IRUGO);
module_param(sser1, int, S_IRUGO);
module_param(sser3, int, S_IRUGO);
module_param(name, charp, S_IRUGO);
module_param(sticky, int, S_IRUGO); // If set, the transmission is started
                                    // already at module installation.
                                    // Otherwise at first open.

//----- Module variables. -----*

static dev_t dev = MKDEV(0, 0);

//----- File operations struct. -----*

static struct file_operations train_fops = {
  .owner   = THIS_MODULE,
  .write   = train_write,
  .open    = train_open,
  .release = train_release,
};

//----- Some general variables. -----*

static struct train_dev train_devices[2];         // One per channel.
static void * dma_mem_base;                       
static struct train_ser_data * ser_data;
static enum train_init_t train_init_level = train_init_none;

//----- Configuration data for the synchronous serial port. -----*

// We can use R_SYNC_SERIAL1_CTRL values for both channels since both
// channels have the same control register layout.

static const u32 sser_ctrl_val =
                  IO_STATE(R_SYNC_SERIAL1_CTRL, tr_baud, c460k8Hz) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, dma_enable, on) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, mode, master_output) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, error, ignore) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, rec_enable, disable) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_synctype, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_syncsize, bit) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, f_sync, on) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_mode, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_halt, running) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, bitorder, lsb) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, wordsize, size8bit) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, buf_empty, lmt_8) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, buf_full, lmt_32) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, flow_ctrl, disabled) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_polarity, pos) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, frame_polarity, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, status_polarity, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, clk_driver, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, frame_driver, normal) |
                  IO_STATE(R_SYNC_SERIAL1_CTRL, status_driver, normal) |
                  (TRAIN_EXTERNAL_DATA_INVERTER ?
                  IO_STATE(R_SYNC_SERIAL1_CTRL, def_out0, high) :
                  IO_STATE(R_SYNC_SERIAL1_CTRL, def_out0, low));

// These must be static since the de-allocation of the DMA channel
// tries to match the pointer, not the string itself.

static const char * ser1_dma_irq_descr = "serial 1 dma tr";
static const char * ser3_dma_irq_descr = "serial 3 dma tr";

//----- Completion structures. -----*

DECLARE_COMPLETION(cmpl0);
DECLARE_COMPLETION(cmpl1);

//------------------------------------------------------------------
// Local functions.
//------------------------------------------------------------------

//----- Get train_dev structure from port number. -----*

static inline struct train_dev * get_train_dev(unsigned int nr)
{
  return &train_devices[sser1 & nr];
}

//----- Get DCC data pattern length. -----*

static inline u16 get_dcc_length(unsigned char data, int part)
{
  int length;
  if (part == 0) {
    data >>= 4;
  }
  data &= 0xf;
  length = ser_data->dcc_pattern.idx[data + 1] -
           ser_data->dcc_pattern.idx[data];
  if (part) {
    length -= 12;       // Discard the start bit for the low half of the byte.
  }
  return length;
}

//----- Get DCC data pattern. -----*

static inline u32 get_dcc_pattern(unsigned char data, int part)
{
  int idx;
  if (part == 0) {
    data >>= 4;
  }
  data &= 0xf;
  idx = ser_data->dcc_pattern.idx[data];
  if (part) {
    idx += 12;          // Discard the start bit for the low half of the byte.
  }
  return virt_to_phys(&(ser_data->dcc_pattern.data[idx]));
}

//----- Get port specific info about irq, dma and reg addresses. -----*

static inline void get_dma_irq_reg_info(struct train_dma_irq_reg_info * info,
                                        int nr)
{
  if (nr) {
    info->dma_irq_nr = SER3_DMA_TX_IRQ_NBR;
    info->dma_nr = SER3_TX_DMA_NBR;
    info->owner = dma_ser3;
    info->dma_irq_descr = ser3_dma_irq_descr;
    info->sser_ctrl = R_SYNC_SERIAL3_CTRL;
    info->dma_first = R_DMA_CH4_FIRST;
    info->dma_cmd = R_DMA_CH4_CMD;
    info->dma_status = R_DMA_CH4_STATUS;
    info->clr_intr = R_DMA_CH4_CLR_INTR;
    info->mask_bit = IO_BITNR(R_IRQ_MASK2_SET, dma4_descr);
  }
  else {
    info->dma_irq_nr = SER1_DMA_TX_IRQ_NBR;
    info->dma_nr = SER1_TX_DMA_NBR;
    info->owner = dma_ser1;
    info->dma_irq_descr = ser1_dma_irq_descr;
    info->sser_ctrl = R_SYNC_SERIAL1_CTRL;
    info->dma_first = R_DMA_CH8_FIRST;
    info->dma_cmd = R_DMA_CH8_CMD;
    info->dma_status = R_DMA_CH8_STATUS;
    info->clr_intr = R_DMA_CH8_CLR_INTR;
    info->mask_bit = IO_BITNR(R_IRQ_MASK2_SET, dma8_descr);
  }
}

//----- Set up cdev structure. -----*

static int train_setup_cdev (struct train_dev * my_dev, int nr)
{
  int err_no;
  dev_t devno;
  
  devno = MKDEV(MAJOR(dev), MINOR(dev) + nr);
  cdev_init(&my_dev->my_cdev, &train_fops);
  my_dev->my_cdev.owner = THIS_MODULE;
  err_no = cdev_add(&my_dev->my_cdev, devno, 1);
  if (err_no) printk(KERN_ALERT "Error: %d adding train_mod%d\n", err_no, nr);
  return err_no;
}

//----- Initialize serial data patterns. -----*

static void init_ser_data (struct train_ser_data * data)
{
  int i, j, k, n, b;
  signed char s;
  unsigned char v;
  unsigned char cv[6];

  // Define the mfx code violation patterns. bit n in each value indicates if
  // there will be a data toggle after the n:th mfx half bit in the pattern.

  cv[cv_00v1 - 16] = 0xad;
  cv[cv_0vv1 - 16] = 0xb5;
  cv[cv_10v1 - 16] = 0xed;
  cv[cv_v100 - 16] = 0xda;
  cv[cv_v101 - 16] = 0xdb;
  cv[cv_vv10 - 16] = 0xd6;
  
  // Initialize Märklin/Motorola (MM) data patterns. Each data pattern
  // represents a 3-bit MM data sequence. There are 8 different patterns
  // to cover all possible 3-bit combinations.
  //
  // A MM bit is 0.208 ms long, i.e., 0.208 * 460.8 / 8 = 12 pattern bytes.
  //
  // A MM '0' is coded as 26 us (12 pattern bits) high followed by 182 us
  // (84 pattern bits) low.
  // A MM '1' is coded as 182 us (84 pattern bits) high followed by 26 us
  // (12 pattern bits) low.
  //
  // Data from the user application (MM commands) are sent lsb first.
  // The synchronous serial port sends pattern bytes lsb first.

  for (i = 0; i < 8; i++) {
    for (j = 0; j < 3; j++) {
      b = ((i >> j) & 1) ? 10 : 1;
      for (k = 0; k < 12; k++) {
        v = (k < b) ? 0xff : ((k == b) ? 0x0f : 0);
        v = TRAIN_EXTERNAL_DATA_INVERTER ? ~v : v;
        data->mm_data[i].data[k + 12 * j] = v;
      }
    }
  }

  // The same for double speed data, where each MM bit is 0.104 ms long.

  for (i = 0; i < 8; i++) {
    for (j = 0; j < 3; j++) {
      b = ((i >> j) & 1) ? 5 : 0;
      for (k = 0; k < 6; k++) {
        v = (k < b) ? 0xff : ((k == 0) ? 0x3f : ((k == 5) ? 0x03 : 0));
        v = TRAIN_EXTERNAL_DATA_INVERTER ? ~v : v;
        data->mmd_data[i].data[k + 6 * j] = v;
      }
    }
  }

  // Initialize Märklin/Motorola pause. Also used by DCC and mfx.

  v = TRAIN_EXTERNAL_DATA_INVERTER ? 0xff : 0;
  for (i = 0; i < TRAIN_MM_MAX_PACKET_GAP; i++) {
    data->mm_pause[i] = v;
  }

  // Initialize DCC patterns. Each data pattern represents a 4-bit DCC
  // data sequence. There are 16 patterns to cover all possible 4-bit
  // combinations. Each data pattern is preceded by a DCC '0' pattern
  // which can be used as a start bit. At the end, the pattern for
  // DCC '1111' (bin) is repeated 4 times (without any start bits in between)
  // to form the DCC preamble sequence.
  //
  // A DCC '1' bit should be 58 us low followed by 58 us high. The actual
  // time is 27 sync. serial bits for each half, i.e., 58.6 us. This is
  // well within the specified tolerance of +/- 3 us.
  //
  // A DCC '0' bit should be 100 us low followed by 100 us high. We use
  // 46, 47 or 48 sync. serial bits, i.e. 99.8, 102.0 or 104.2 us. The
  // different lengths are chosen to make the different data patterns
  // byte aligned. This kind of variation is explicitly allowed by the DCC
  // standard.
  //
  // The DCC protocol is actually polarity independent but we keep polarity
  // consistent with the MM protocol to avoid unexpected effects when
  // alternating between the protocols.

  data->dcc_pattern.idx[0] = 0;
  for (i = 0; i < 16; i++) {

    // Start bit.

    v = TRAIN_EXTERNAL_DATA_INVERTER ? 0xff : 0;
    for (j = 0; j < 6; j++) {
      data->dcc_pattern.data[j + data->dcc_pattern.idx[i]] = v;
    }
    for ( ; j < 12; j++) {
      data->dcc_pattern.data[j + data->dcc_pattern.idx[i]] = ~v;
    }

    // Actual data pattern.

    n = 0;
    for (k = 3; k >= 0; k--) {
      n += (i == 0) ? 46 : (((i >> k) & 1) ? 27 : 47);
      while (n >= 8) {
        n -= 8;
        data->dcc_pattern.data[j + data->dcc_pattern.idx[i]] = v;
        v = TRAIN_EXTERNAL_DATA_INVERTER ? 0xff : 0;
        j++;
      }

      v = v ^ (0xff << n);
      n += (i == 0) ? 46 : (((i >> k) & 1) ? 27 : 47);
      while (n >= 8) {
        n -= 8;
        data->dcc_pattern.data[j + data->dcc_pattern.idx[i]] = v;
        v = TRAIN_EXTERNAL_DATA_INVERTER ? 0 : 0xff;
        j++;
      }
      v = v ^ (0xff << n) ;
    }
    data->dcc_pattern.idx[i + 1] = data->dcc_pattern.idx[i] + j;
  }

  // Add preamble as a continuation of the pattern for '1111' (bin).

  for (i = 0; i < 3; i++) {
    for (k = 3; k >= 0; k--) {
      n += 27;
      while (n >= 8) {
        n -= 8;
        data->dcc_pattern.data[j + data->dcc_pattern.idx[15]] = v;
        v = TRAIN_EXTERNAL_DATA_INVERTER ? 0xff : 0;
        j++;
      }

      v = v ^ (0xff << n);
      n += 27;
      while (n >= 8) {
        n -= 8;
        data->dcc_pattern.data[j + data->dcc_pattern.idx[15]] = v;
        v = TRAIN_EXTERNAL_DATA_INVERTER ? 0 : 0xff;
        j++;
      }
      v = v ^ (0xff << n) ;
    }
  }

  // Initialize mfx patterns. Each data pattern represents a 4-bit mfx data
  // sequence. One mfx bit is 100 us, which corresponds to 46 bits of
  // serial data. Each pattern is therefore 23 bytes long.
  // An mfx '0' contains an initial data toggle, and thereafter a stable
  // level. An mfx '1' contains an initial data toggle plus a data toggle
  // in the middle of the bit (i.e., after 23 serial data bits).
  //
  // Each data pattern exists with positive (p[0]) and negative
  // (p[1]) starting edge. The first 16 pattern pairs represent all possible
  // 4-bit combinations of normal data, while pairs 16-21 represent all
  // necessary 4-bit combinations containing code violations for generation
  // of the mfx start and end flags.
  //
  // We define a "code violation" to be a '1' bit without an initial polarity
  // change. We need the following patterns, where the code violation bits
  // are marked 'v' (patterns transmitted lsb first):
  //
  // Pattern pair 16: 00v1
  // Pattern pair 17: 0vv1
  // Pattern pair 18: 10v1
  // Pattern pair 19: v100
  // Pattern pair 20: v101
  // Pattern pair 21: vv10
  //
  // If a code violation is required only in the lsb it does not need a
  // special pattern. An ordinary data pattern with opposite to normal
  // polarity can be used instead. Also, we handle any remaining bits after
  // the last end flag by truncating the last pattern, so we do not need any
  // special patterns for this case either.

  for (i = 0; i < 22; i++) {
    for (j = 0; j < 2; j++) {
      v = (TRAIN_EXTERNAL_DATA_INVERTER ^ j) ? 0 : 0xff;     // Initial level.
      s = (TRAIN_EXTERNAL_DATA_INVERTER ^ j) ? 0x80 : 0x7f;  // Edge byte.
      n = 0;
      for (k = 0; k < 8; k++) {                // Each pattern has 8 half bits.
        data->mfx_data[i].p[j].data[n++] = v;
        data->mfx_data[i].p[j].data[n++] = v;
        if (n < 23) {
          if (1 & ((i < 16) ? (k | (i >> (k >> 1))) : (cv[i - 16] >> k))) {
            data->mfx_data[i].p[j].data[n++] = (unsigned) s; // Insert an edge.
            s = ~s;                                          // Toggle data.
            v = ~v;
          }
          else {
            data->mfx_data[i].p[j].data[n++] = v;
          }
          s = s >> 1;              // Shift with sign fill, since s is signed.
        }
      }
    }
  }
}

//----- Initalize DMA descriptors. -----*

static void init_descr_list (struct train_dma_descr_pool * descr_list)
{
  int i, j, k, d;
  const u16 inter_gap = TRAIN_MM_INTER_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE;
  const u16 max_gap = TRAIN_MM_MAX_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE;
  const u16 dcc_gap = TRAIN_DCC_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE;

  // Initialize the protocol independent "done" nodes to point to the normal
  // speed Märklin/Motorola lists.

  for (i = 0; i < 2; i++) {
    descr_list->done[i].sw_len = TRAIN_PACKET_DONE_PAUSE;
    descr_list->done[i].ctrl = 0;
    descr_list->done[i].next = virt_to_phys(&(descr_list->mm[i].t2));
    descr_list->done[i].buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->done[i].hw_len = 0;
    descr_list->done[i].status = 0;
    descr_list->done[i].fifo_len = 0;
  }

  // Initialize Märklin/Motorola data packet list.
  // The structure below shows the normal speed list structure.
  // A similar structure is set up for the double speed case, but the "done"
  // nodes which are common to all protocols are initialized to point into
  // the normal speed packets.
  //
  //  +--->mm[0].tr-->+
  //  |               |
  //  | +->mm[0].t2-->+->mm[0].p[0].d[0]-->mm[0].p[0].d[1]-->mm[0].p[0].d[2]--+
  //  | |                                                                     |
  //  | | +--mm[0].t1<--mm[0].p[0].d[5]<--mm[0].p[0].d[4]<--mm[0].p[0].d[3]<--+
  //  | | |
  //  | | +------------>mm[0].p[1].d[0]-->mm[0].p[1].d[1]-->mm[0].p[1].d[2]---+
  //  | |                                                                     |
  //  | +--<--done[0]<--mm[0].p[1].d[5]<--mm[0].p[1].d[4]<--mm[0].p[1].d[3]<--+
  //  | :
  //  | +->mm[1].tr-->+
  //  :               |
  //  +--->mm[1].t2-->+->mm[1].p[0].d[0]-->mm[1].p[0].d[1]-->mm[1].p[0].d[2]--+
  //  |                                                                       |
  //  |   +--mm[1].t1<--mm[1].p[0].d[5]<--mm[1].p[0].d[4]<--mm[1].p[0].d[3]<--+
  //  |   |
  //  |   +------------>mm[1].p[1].d[0]-->mm[1].p[1].d[1]-->mm[1].p[1].d[2]---+
  //  |                                                                       |
  //  +----<--done[1]<--mm[1].p[1].d[5]<--mm[1].p[1].d[4]<--mm[1].p[1].d[3]<--+

  for (i = 0; i < 4; i++) {
    for (j = 0; j < 2; j++) {
      d = TRAIN_MM_IDLE_PACKET;          // Start with the idle packet.
      for (k = 0; k < 6; k++) {
        descr_list->mm[i].p[j].d[k].sw_len = (i < 2) ? 36 :  // Normal speed.
                                                       18;   // Double speed.
        descr_list->mm[i].p[j].d[k].ctrl = 0;
        if (k < 5) {
          descr_list->mm[i].p[j].d[k].next =
            virt_to_phys(&(descr_list->mm[i].p[j].d[k + 1]));
        }
        descr_list->mm[i].p[j].d[k].buf = (i < 2) ?
          virt_to_phys(&(ser_data->mm_data[d & 7])) :  // Normal speed pattern.
          virt_to_phys(&(ser_data->mmd_data[d & 7]));  // Double speed pattern.
        descr_list->mm[i].p[j].d[k].hw_len = 0;
        descr_list->mm[i].p[j].d[k].status = 0;
        descr_list->mm[i].p[j].d[k].fifo_len = 0;
        d >>= 3;
      }
    }
    descr_list->mm[i].p[0].d[5].next = virt_to_phys(&(descr_list->mm[i].t1));
    descr_list->mm[i].p[1].d[5].next = virt_to_phys(&(descr_list->done[i & 1]));

    // Märklin/Motorola gap between the two packet halves.

    descr_list->mm[i].t1.sw_len = (i < 2) ?
                      TRAIN_MM_INTRA_PACKET_GAP :          // Normal speed.
                      TRAIN_MM_INTRA_PACKET_GAP / 2;       // Double speed.
    descr_list->mm[i].t1.ctrl = 0;
    descr_list->mm[i].t1.next = virt_to_phys(&(descr_list->mm[i].p[1].d[0]));
    descr_list->mm[i].t1.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].t1.hw_len = 0;
    descr_list->mm[i].t1.status = 0;
    descr_list->mm[i].t1.fifo_len = 0;

    // Märklin/Motorola inter packet pauses. By default we use the
    // "Universal" timing, which alternates between 4.025 ms and 6.025 ms.

    descr_list->mm[i].t2.sw_len = i ? inter_gap : max_gap;
    descr_list->mm[i].t2.ctrl = 0;
    descr_list->mm[i].t2.next = virt_to_phys(&(descr_list->mm[i].p[0].d[0]));
    descr_list->mm[i].t2.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].t2.hw_len = 0;
    descr_list->mm[i].t2.status = 0;
    descr_list->mm[i].t2.fifo_len = 0;

    // The same, but with the descriptor interrupt set.

    descr_list->mm[i].tr.sw_len = i ? inter_gap : max_gap;
    descr_list->mm[i].tr.ctrl = d_int;
    descr_list->mm[i].tr.next = virt_to_phys(&(descr_list->mm[i].p[0].d[0]));
    descr_list->mm[i].tr.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mm[i].tr.hw_len = 0;
    descr_list->mm[i].tr.status = 0;
    descr_list->mm[i].tr.fifo_len = 0;
  }

  // Initialize DCC data packet lists.
  // The figure below shows the list structure for DCC. We initialize the list
  // with n=6 and link the remaining 6 descriptors together.
  //
  //  +-->dcc[0].p-->dcc[0].d[0]-...->dcc[0].d[n]-->dcc[0].e-->done[0]-->+
  //  |                                                                  |
  //  +---------------+<-------------------------------------dcc[0].t1<--+
  //                  |                                                  :
  //  +-->dcc[0].tr-->+                                  +<--dcc[1].tr<--+
  //  :                                                  |
  //  +-->dcc[1].t1------------------------------------->+-------------->+
  //  |                                                                  |
  //  +<--done[1]<--dcc[1].e<--dcc[1].d[n]<-...-dcc[1].d[0]<--dcc[1].p<--+

  // Data.

  for (i = 0; i < 2; i++) {
    d = TRAIN_DCC_IDLE_PACKET;
    for (j = 0; j < 12; j++) {
      descr_list->dcc[i].d[j].sw_len = get_dcc_length((unsigned char) d, j & 1);
      descr_list->dcc[i].d[j].ctrl = 0;
      if (j < 11) {
        descr_list->dcc[i].d[j].next =
          virt_to_phys(&(descr_list->dcc[i].d[j + 1]));
      }
      descr_list->dcc[i].d[j].buf = get_dcc_pattern((unsigned char) d, j & 1);
      descr_list->dcc[i].d[j].hw_len = 0;
      descr_list->dcc[i].d[j].status = 0;
      descr_list->dcc[i].d[j].fifo_len = 0;
      d >>= (j & 1) << 3;
    }
    descr_list->dcc[i].d[5].next = virt_to_phys(&(descr_list->dcc[i].e));
    descr_list->dcc[i].d[11].next = virt_to_phys(&(descr_list->dcc[i].e));

    // Preamble.

    descr_list->dcc[i].p.sw_len = 4 * get_dcc_length(0xf, 1);
    descr_list->dcc[i].p.ctrl = 0;
    descr_list->dcc[i].p.next = virt_to_phys(&(descr_list->dcc[i].d[0]));
    descr_list->dcc[i].p.buf = get_dcc_pattern(15, 1);
    descr_list->dcc[i].p.hw_len = 0;
    descr_list->dcc[i].p.status = 0;
    descr_list->dcc[i].p.fifo_len = 0;

    // End of packet.

    descr_list->dcc[i].e.sw_len = TRAIN_DCC_END_LENGTH;
    descr_list->dcc[i].e.ctrl = 0;
    descr_list->dcc[i].e.next = virt_to_phys(&(descr_list->done[i]));
    descr_list->dcc[i].e.buf = get_dcc_pattern(8, 1);
    descr_list->dcc[i].e.hw_len = 0;
    descr_list->dcc[i].e.status = 0;
    descr_list->dcc[i].e.fifo_len = 0;

    // Inter packet pause.

    descr_list->dcc[i].t1.sw_len = dcc_gap;
    descr_list->dcc[i].t1.ctrl = 0;
    descr_list->dcc[i].t1.next = virt_to_phys(&(descr_list->dcc[i].p));
    descr_list->dcc[i].t1.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->dcc[i].t1.hw_len = 0;
    descr_list->dcc[i].t1.status = 0;
    descr_list->dcc[i].t1.fifo_len = 0;

    // Inter packet pause once more, now with interrupt set.

    descr_list->dcc[i].tr.sw_len = dcc_gap;
    descr_list->dcc[i].tr.ctrl = d_int;
    descr_list->dcc[i].tr.next = virt_to_phys(&(descr_list->dcc[i].p));
    descr_list->dcc[i].tr.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->dcc[i].tr.hw_len = 0;
    descr_list->dcc[i].tr.status = 0;
    descr_list->dcc[i].tr.fifo_len = 0;
  }

  // Initialize mfx packet lists. Since there is no known "mfx idle" command
  // we create a 16-bit (+ 8 bit bad CRC) junk packet with only '0's, and
  // then tie the rest of the descriptors together. The real commands will
  // overwrite this anyway, but we want to have fully linked DMA lists from
  // the start.

  for (i = 0; i < 2; i++) {

    // Inter packet pause with interrupt set.

    descr_list->mfx[i].tr.sw_len = max_gap; // Seems to be what the Märklin
                                            // Mobile Station uses.
    descr_list->mfx[i].tr.ctrl = d_int;
    descr_list->mfx[i].tr.next = virt_to_phys(&(descr_list->mfx[i].f));
    descr_list->mfx[i].tr.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mfx[i].tr.hw_len = 0;
    descr_list->mfx[i].tr.status = 0;
    descr_list->mfx[i].tr.fifo_len = 0;

    // Inter packet pause without interrupt.

    descr_list->mfx[i].t1.sw_len = max_gap; // Seems to be what the Märklin
                                            // Mobile Station uses.
    descr_list->mfx[i].t1.ctrl = 0;
    descr_list->mfx[i].t1.next = virt_to_phys(&(descr_list->mfx[i].f));
    descr_list->mfx[i].t1.buf = virt_to_phys(&(ser_data->mm_pause[0]));
    descr_list->mfx[i].t1.hw_len = 0;
    descr_list->mfx[i].t1.status = 0;
    descr_list->mfx[i].t1.fifo_len = 0;

    // Start flag (except the last bit).

    descr_list->mfx[i].f.sw_len = 23;
    descr_list->mfx[i].f.ctrl = 0;
    descr_list->mfx[i].f.next = virt_to_phys(&(descr_list->mfx[i].d[0]));
    descr_list->mfx[i].f.buf =
      virt_to_phys(&(ser_data->mfx_data[cv_vv10].p[0]));
    descr_list->mfx[i].f.hw_len = 0;
    descr_list->mfx[i].f.status = 0;
    descr_list->mfx[i].f.fifo_len = 0;

    // Link all data descriptors together and point them to data '0000' (bin).

    for (j = 0; j < 72; j++) {
      descr_list->mfx[i].d[j].sw_len = 23;
      descr_list->mfx[i].d[j].ctrl = 0;
      if (j < 71) {
        descr_list->mfx[i].d[j].next =
          virt_to_phys(&(descr_list->mfx[i].d[j + 1]));
      }
      descr_list->mfx[i].d[j].buf =
        virt_to_phys(&(ser_data->mfx_data[0].p[1]));
      descr_list->mfx[i].d[j].hw_len = 0;
      descr_list->mfx[i].d[j].status = 0;
      descr_list->mfx[i].d[j].fifo_len = 0;
    }

    // Point the last data descriptor to the end descriptor.
    // This is not a valid packet end, but at least stops the DMA from
    // going wild if we get here by mistake.

    descr_list->mfx[i].d[71].next = virt_to_phys(&(descr_list->mfx[i].e));

    // Create an end of packet after 24 bits.

    descr_list->mfx[i].d[6].buf =
      virt_to_phys(&(ser_data->mfx_data[cv_v100].p[1]));
    descr_list->mfx[i].d[7].next = virt_to_phys(&(descr_list->mfx[i].e));
    descr_list->mfx[i].d[7].buf = virt_to_phys(&(ser_data->mfx_data[9].p[1]));

    // The end descriptor. The end descriptor needs modification
    // of the 'sw_len' field to truncate the data pattern.

    descr_list->mfx[i].e.sw_len = 17;                 // Almost 3 mfx bits.
    descr_list->mfx[i].e.ctrl = 0;
    descr_list->mfx[i].e.next = virt_to_phys(&(descr_list->done[i]));
    descr_list->mfx[i].e.buf =
      virt_to_phys(&(ser_data->mfx_data[cv_00v1].p[0]));
    descr_list->mfx[i].e.hw_len = 0;
    descr_list->mfx[i].e.status = 0;
    descr_list->mfx[i].e.fifo_len = 0;
  }
}

//----- Initialize R_SYNC_SERIAL_PRESCALE. -----*

static inline void init_sser_prescale (void)
{
  * R_SYNC_SERIAL_PRESCALE =
      IO_STATE(R_SYNC_SERIAL_PRESCALE, clk_sel_u3, baudrate) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, word_stb_sel_u3, external) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, clk_sel_u1, baudrate) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, word_stb_sel_u1, external) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, prescaler, div1) |
      IO_STATE(R_SYNC_SERIAL_PRESCALE, warp_mode, normal) |
      IO_FIELD(R_SYNC_SERIAL_PRESCALE, frame_rate, 0) |
      IO_FIELD(R_SYNC_SERIAL_PRESCALE, word_rate, 7);
}

//----- Start DMA and serial ports. -----*

static int init_sser_port (struct train_dev * my_dev)
{
  struct train_dma_descr_pool * descr_list;
  struct train_dma_irq_reg_info info;

  descr_list = my_dev->descr_list;
  get_dma_irq_reg_info(&info, my_dev->nr);

  // Setup the serial port but do not start it yet.

  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, disable);

  // Register DMA and DMA irq.

  if (request_irq(info.dma_irq_nr,
                  tr_interrupt,
                  SA_INTERRUPT,
                  info.dma_irq_descr,
                  my_dev)) {
    printk(KERN_ALERT "DMA %d irq (%d) busy, give up.\n",
           info.dma_nr, info.dma_irq_nr);
    return -EBUSY;
  }
  else if (cris_request_dma(info.dma_nr,
                            info.dma_irq_descr,
                            DMA_VERBOSE_ON_ERROR,
                            info.owner)) {
    free_irq(info.dma_irq_nr, my_dev);
    printk(KERN_ALERT "DMA %d busy, give up.\n", info.dma_nr);
    return -EBUSY;
  }

  // Reset DMA and wait for reset completion.

  * info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, reset);
  while((* info.dma_cmd & IO_MASK(R_DMA_CH4_CMD, cmd )) !=
         IO_STATE(R_DMA_CH4_CMD, cmd, hold));

  // Clear DMA interrupts and turn on/off interrupt mask.

  * info.clr_intr = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                    IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  * R_IRQ_MASK2_CLR = 2 << info.mask_bit;     // Disable eop interrupt.
  * R_IRQ_MASK2_SET = 1 << info.mask_bit;     // Enable descr interrupt.

  // Re-initialize the completion signalling.

  INIT_COMPLETION(* my_dev->cmpl);

  // Start DMA and wait until it has started to fill the FIFO.

  my_dev->descr_list_nr = 0;
	* info.dma_first = virt_to_phys(&(descr_list->mm[0].tr));
	* info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, start);
  while (* info.dma_status == 0);

  // Start the serial port.

  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, enable);
  return 0;
}

//----- Stop DMA and serial ports. -----*

static void stop_sser_port (struct train_dev * my_dev)
{
  struct train_dma_irq_reg_info info;

  get_dma_irq_reg_info(&info, my_dev->nr);
  * info.sser_ctrl = sser_ctrl_val |
                     IO_STATE(R_SYNC_SERIAL1_CTRL, tr_enable, disable);
  * info.dma_cmd = IO_STATE(R_DMA_CH4_CMD, cmd, reset);

  // We need to reset DMA before clearing interrupts to make sure no
  // new interrupts can occur. However, interrupts should be masked
  // off and cleared before the DMA is released to a possible other user.
  // Therefore, we need to do an explicit DMA reset here even though
  // cris_free_dma() resets the DMA.

  while((* info.dma_cmd & IO_MASK(R_DMA_CH4_CMD, cmd )) !=
         IO_STATE(R_DMA_CH4_CMD, cmd, hold));
  * R_IRQ_MASK2_CLR = 3 << info.mask_bit;              // Disable interrupts.
  * info.clr_intr = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                    IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  free_irq(info.dma_irq_nr, my_dev);
  cris_free_dma(info.dma_nr, info.dma_irq_descr);      // deallocate DMA.
}

//----- Check validity of Märklin/Motorola command. -----*

static inline int mm_ok(size_t size)
{
  return (size == 4) ? 1 : 0;    // This command is always 4 bytes long.
}                                // All possible data values are allowed.

//----- Check validity of DCC command. -----*

static inline int dcc_ok (size_t size)
{
  return ((size >= 3) && (size <= 6)) ? 1: 0; // A DCC command is 3 to 6 bytes.
}

//----- Check validity of mfx command. -----*

static inline int mfx_ok (struct train_packet_data * data, size_t size)
{
  unsigned int i;

  i = 1;
  while (i < size) {
    if ((data->data[i] < 9) || (data->data[i] > 63)) {
      return 0;
    }
    i += 1 + ((data->data[i] + 7) >> 3);
    if (data->data[0] != train_cmd_mfx) {
      break;
    }
  }
  return (i == size);
}
//----- mfx command. -----*

static inline void mfx_cmd(struct train_packet_data * data, size_t size,
                           struct train_dev * dev)
{
  unsigned int crc;
  u32 d;                   // Data prepared for sending (after bit-stuffing).
  unsigned int bits;       // The number of prepared data bits.
  unsigned int length;     // Length of the current command, excluding flags,
                           // CRC and bit-stuffing.
  unsigned int cmd_idx;    // Byte index into the original packet data.
  int pp;                  // Pattern polarity. Initially high = 0,
                           // initially low = 1.
  int ones;                // The number of consecutive 1's, for bit-stuffing.
  unsigned int b;          // The current bit from the source data.
  unsigned int fp;         // Set if the flag pattern is not completed.
  unsigned int pidx;       // Data pattern index.
  int i, j;
  static const int np = 0x2b6996;                 // Next pattern polarity.

  pp = 1;
  cmd_idx = 1;
  d = 0;
  j = 0;
  fp = 0;
  bits = 1;                                 // There is one remaining '0'
                                            // bit from the start flags.
  while (cmd_idx < size) {                  // Loop through all packets.
    length = data->data[cmd_idx];
    crc = INIT_CRC;
    ones = 0;
    for (i = 0; i < length + 8; i++) {      // Loop through the packet bits.
      if (i < length) {
        if ((i & 7) == 0) {
          cmd_idx++;                                // A new source data byte.
        }
        b = ((signed char)
             (data->data[cmd_idx] << (i & 7))) < 0; // Get current data bit.
        if (b != (crc & 1)) {                       // Calculate CRC.
          crc ^= CRC_POLY;
        }
      }
      else {
        b = crc & 1;                                // Append CRC.
      }
      crc >>= 1;
      d |= (b << bits);
      bits++;

      // Bit-stuffing. Should be made after CRC calculation/insertion.

      ones = b ? ones + 1 : 0;
      if (ones == 8) {
        ones = 0;
        bits++;
      }

      if (bits >= 4) {

        // Insert one 4-bit pattern in the DMA list.

        if (fp) {
          pidx = (d & 8) ? cv_10v1 : cv_00v1;
          fp = 0;
        }
        else {
          pidx = d & 15;
        }
        dev->descr_list->mfx[dev->descr_list_nr].d[j].next =
          virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].d[j + 1]));
        dev->descr_list->mfx[dev->descr_list_nr].d[j].buf =
          virt_to_phys(&(ser_data->mfx_data[pidx].p[pp]));
        j++;
        pp ^= (np >> pidx) & 1;                   // Next polarity.
        bits -= 4;
        d >>= 4;
      }
    }

    //  Insert remaining bits and two flags.

    bits = (bits + 1) & 3;  // Number of remaining bits of the flag pattern
                            // after we have inserted what we can do here.
    if (bits == 0) {
      dev->descr_list->mfx[dev->descr_list_nr].d[j].next =
        virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].d[j + 1]));
      dev->descr_list->mfx[dev->descr_list_nr].d[j].buf =
        virt_to_phys(&(ser_data->mfx_data[d].p[pp]));
      j++;
      pp ^= (np >> d) & 1;                        // Next polarity.
    }
    switch (bits) {
      case 0:  pidx = cv_0vv1;               break;
      case 1:  pidx = cv_vv10;               break;
      case 2:  pidx = d ? cv_v101 : cv_v100; break;
      case 3:
      default: pidx = d | 8;                 break;
    }
    dev->descr_list->mfx[dev->descr_list_nr].d[j].next =
      virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].d[j + 1]));
    dev->descr_list->mfx[dev->descr_list_nr].d[j].buf =
      virt_to_phys(&(ser_data->mfx_data[pidx].p[pp]));
    j++;
    pp ^= (np >> pidx) & 1;                // Next polarity.
    pp ^= (bits >= 2);                     // Insert code violation at the end
                                           // of the pattern in these cases.
    d = (bits == 2);
    fp = (bits == 3);
    cmd_idx++;
  }

  // Insert the rest of the flags.

  switch (bits) {
    case 0:  pidx = cv_vv10; break;
    case 1:  pidx = cv_v100; break;
    case 2:  pidx = 9;       break;
    case 3:
    default: pidx = cv_00v1; break;
  }
  dev->descr_list->mfx[dev->descr_list_nr].d[j].next =
    virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].e));
  dev->descr_list->mfx[dev->descr_list_nr].d[j].buf =
    virt_to_phys(&(ser_data->mfx_data[pidx].p[pp]));
  pp ^= (np >> pidx) & 1;                // Next polarity.
  pp ^= (np >> bits) & 1;                // Insert code violation if needed.
  switch (bits) {
    case 0:
    pidx = 0;
    length = pp ? 5 : 6;
    break;

    case 1:
    pidx = 1;
    length = pp ? 11 : 12;
    break;

    case 2:
    pidx = cv_00v1;
    length = pp ? 18 : 17;
    break;

    case 3:
    default:
    pidx = cv_0vv1;
    length = 23;
    break;
  }
  dev->descr_list->mfx[dev->descr_list_nr].e.sw_len = (u16) length;
  dev->descr_list->mfx[dev->descr_list_nr].e.buf =
    virt_to_phys(&(ser_data->mfx_data[pidx].p[pp]));

  if (data->data[0] == train_cmd_mfx) {

    // Loop back the mfx packet to itself.

    dev->descr_list->done[dev->descr_list_nr].next =
      virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].t1));
  }
  else {

    // The 'train_cmd_mfx_once' command.

    // Here we create a Märklin/Motorola idle packet to send after the mfx
    // packet if no new command is given before the mfx packet is completed.

    d = TRAIN_MM_IDLE_PACKET;

    // Point the descriptors to the data.

    for (i = 0; i < 6; i++) {
      dev->descr_list->mm[dev->descr_list_nr].p[0].d[i].buf =
      dev->descr_list->mm[dev->descr_list_nr].p[1].d[i].buf =
        virt_to_phys(&(ser_data->mm_data[d & 7]));
      d >>= 3;
    }

    // Loop back the Märklin/Motorola packet to itself.

    dev->descr_list->done[dev->descr_list_nr].next =
      virt_to_phys(&(dev->descr_list->mm[dev->descr_list_nr].t2));

  }

  // Append the new list to the old one.

  dev->descr_list->done[dev->descr_list_nr ^ 1].next =
    virt_to_phys(&(dev->descr_list->mfx[dev->descr_list_nr].tr));
}

//----- DCC command. -----*

static inline void dcc_cmd(struct train_packet_data * data, size_t size,
                           struct train_dev * dev)
{
  int i, j, k;
  unsigned char ec;

  ec = 0;
  k = 0;

  // Point the descriptors to the data, and set the length.

  for (i = 1; i < size; i++) {
    ec ^= data->data[i];
    for (j = 0; j < 2; j++) {
      dev->descr_list->dcc[dev->descr_list_nr].d[k].sw_len =
        get_dcc_length(data->data[i], j);
      dev->descr_list->dcc[dev->descr_list_nr].d[k].buf =
        get_dcc_pattern(data->data[i], j);
      k++;
    }
  }

  // Add the error detection byte.

  for (j = 0; j < 2; j++) {
    dev->descr_list->dcc[dev->descr_list_nr].d[k].sw_len =
      get_dcc_length(ec, j);
    dev->descr_list->dcc[dev->descr_list_nr].d[k].buf =
      get_dcc_pattern(ec, j);
    k++;
  }

  // Build the list. Most of it is static so we only have to modify the
  // few variable parts.

  k = 5;
  for (i = 3; i < size; i++) {
    dev->descr_list->dcc[dev->descr_list_nr].d[k].next =
      virt_to_phys(&(dev->descr_list->dcc[dev->descr_list_nr].d[k + 1]));
    k += 2;
  }
  dev->descr_list->dcc[dev->descr_list_nr].d[k].next =
    virt_to_phys(&(dev->descr_list->dcc[dev->descr_list_nr].e));

  // Loop back the new list to itself.

  dev->descr_list->done[dev->descr_list_nr].next =
    virt_to_phys(&(dev->descr_list->dcc[dev->descr_list_nr].t1));

  // Append the new list to the old one.

  dev->descr_list->done[dev->descr_list_nr ^ 1].next =
    virt_to_phys(&(dev->descr_list->dcc[dev->descr_list_nr].tr));

}

//----- Märklin/Motorola train command. -----*

static inline void mm_cmd(struct train_packet_data * data,
                          struct train_dev * dev)
{
  int i;
  u32 d;

  d = (* ((u32 *) data->data)) >> 8;

  // Point the descriptors to the data.

  for (i = 0; i < 6; i++) {
    dev->descr_list->mm[dev->descr_list_nr].p[0].d[i].buf =
    dev->descr_list->mm[dev->descr_list_nr].p[1].d[i].buf =
      virt_to_phys(&(ser_data->mm_data[d & 7]));
    d >>= 3;
  }

  // Loop back the new list to itself.

  dev->descr_list->done[dev->descr_list_nr].next =
    virt_to_phys(&(dev->descr_list->mm[dev->descr_list_nr].t2));

  // Append the new list to the old one.

  dev->descr_list->done[dev->descr_list_nr ^ 1].next =
    virt_to_phys(&(dev->descr_list->mm[dev->descr_list_nr].tr));
}

//----- Märklin/Motorola accessory command. -----*

static inline void mmd_cmd(struct train_packet_data * data,
                           struct train_dev * dev)
{
  int i;
  u32 d;

  d = (* ((u32 *) data->data)) >> 8;

  // Point the descriptors to the data.

  for (i = 0; i < 6; i++) {
    dev->descr_list->mm[dev->descr_list_nr + 2].p[0].d[i].buf =
    dev->descr_list->mm[dev->descr_list_nr + 2].p[1].d[i].buf =
      virt_to_phys(&(ser_data->mmd_data[d & 7]));
    d >>= 3;
  }

  // Loop back the new list to itself.

  dev->descr_list->done[dev->descr_list_nr].next =
    virt_to_phys(&(dev->descr_list->mm[dev->descr_list_nr + 2].t2));

  // Append the new list to the old one.

  dev->descr_list->done[dev->descr_list_nr ^ 1].next =
    virt_to_phys(&(dev->descr_list->mm[dev->descr_list_nr + 2].tr));
}

//----- Märklin/Motorola change pause command. -----*

// This can be done without lock, since the write to the sw_len field
// is atomic by nature (assuming word-aligned descriptors).

static inline void mm_pause_cfg (struct train_packet_data * data,
                                 size_t size, struct train_dev * dev)
{
  u16 val;
  int i;

  if (size != 3) {
    return;
  }
  val = * ((u16 *) (&(data->data[1])));
  if (val == TRAIN_MM_UNIVERSAL_PACKET_GAP) {
    for (i = 0; i < 4; i++) {
      dev->descr_list->mm[i].t2.sw_len = (i & 1) ?
        (TRAIN_MM_INTER_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE) :
        (TRAIN_MM_MAX_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE);
      dev->descr_list->mm[i].tr.sw_len = (i & 1) ?
        (TRAIN_MM_INTER_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE) :
        (TRAIN_MM_MAX_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE);
    }
  }
  else {
    if ((val > TRAIN_MM_MAX_PACKET_GAP) || (val <= TRAIN_PACKET_DONE_PAUSE)) {
      val = TRAIN_MM_MAX_PACKET_GAP - TRAIN_PACKET_DONE_PAUSE;
    }
    else {
      val -= TRAIN_PACKET_DONE_PAUSE;
    }
    for (i = 0; i < 4; i++) {
      dev->descr_list->mm[i].t2.sw_len = val;
      dev->descr_list->mm[i].tr.sw_len = val;
    }
  }
}

//----- Check commands, and execute configuration commands. -----*

static inline int check_cmd_do_cfg (struct train_packet_data * data,
                                    size_t size, struct train_dev * dev)
{
  int done;
  done = 1;                       // Default value.
  switch (data->data[0]) {

    // Train commands.

    case train_cmd_dcc:       done = !dcc_ok(size);            break;
    case train_cmd_mm:        done = !mm_ok(size);             break;
    case train_cmd_mmd:       done = !mm_ok(size);             break;
    case train_cmd_mfx:
    case train_cmd_mfx_once:  done = !mfx_ok(data, size);      break;

    // Configuration commands, can be performed immediately.

    case train_cmd_mm_pause:  mm_pause_cfg(data, size, dev);   break;
  }
  return done;     // Returns 0 if the command needs further handling.
                   // Returns 1 if the command is completed or erroneous.
}

//------------------------------------------------------------------
// DMA interrupt handler.
//------------------------------------------------------------------

// Only run the irq we actually got (unlike the std serial driver).

static irqreturn_t tr_interrupt(int irq, void * dev_id)
{
  u32 ireg;
  struct train_dev * my_dev;

  my_dev = (struct train_dev *) dev_id;
  ireg = * R_IRQ_MASK2_RD;
  if (my_dev->nr) {
    ireg >>= IO_BITNR(R_IRQ_MASK2_RD, dma4_descr);
    * R_DMA_CH4_CLR_INTR = IO_STATE(R_DMA_CH4_CLR_INTR, clr_descr, do) |
                           IO_STATE(R_DMA_CH4_CLR_INTR, clr_eop, do);
  }
  else {
    ireg >>= IO_BITNR(R_IRQ_MASK2_RD, dma8_descr);
    * R_DMA_CH8_CLR_INTR = IO_STATE(R_DMA_CH8_CLR_INTR, clr_descr, do) |
                           IO_STATE(R_DMA_CH8_CLR_INTR, clr_eop, do);
  }
  if (ireg & 1) {
    my_dev->descr_list_nr ^= 1;      // Switch to the other DMA list.
    complete(my_dev->cmpl);
  }
  return IRQ_RETVAL(ireg & 3);
}

//------------------------------------------------------------------
// File operations.
//------------------------------------------------------------------

//----- Open function. -----*

int train_open (struct inode * my_inode, struct file * my_file)
{
  struct train_dev * my_dev;

  my_dev = container_of(my_inode->i_cdev, struct train_dev, my_cdev);
  my_file->private_data = my_dev;
  
  // Start the transmission on the first open. Protected by semaphore.

  if (down_interruptible(&my_dev->sem)) return -ERESTARTSYS;
  if (my_dev->ref_cnt == 0) {
    if (init_sser_port(my_dev)) {
      up(&my_dev->sem);
      return -EBUSY;
    }
  }
  my_dev->ref_cnt++;
  up(&my_dev->sem);

  return 0;
}

//----- Release function. -----*

int train_release (struct inode * my_inode, struct file * my_file)
{
  struct train_dev * my_dev;

  my_dev = container_of(my_inode->i_cdev, struct train_dev, my_cdev);

  // Stop the transmission on the last close. Protected by semaphore.

  if (down_interruptible(&my_dev->sem)) return -ERESTARTSYS;
  my_dev->ref_cnt--;
  if (my_dev->ref_cnt == 0) {
    stop_sser_port(my_dev);
  }
  up(&my_dev->sem);

  return 0;
}

//----- Write function. -----*

ssize_t train_write (struct file * my_file, const char __user * my_data,
                 size_t my_size, loff_t * offp)
{
  struct train_dev * my_dev;
  struct train_packet_data data;
  int i;
 
  // Ignore (silently consume) too short or too long packages.

  if ((my_size < 3) || (my_size > sizeof(struct train_packet_data))) {
    return my_size;
  }

  // Copy data to dynamic variable. No lock needed for this.
  
  if (copy_from_user(data.data, my_data, my_size)) return -EFAULT;
  
  // Get device structure.

  my_dev = my_file->private_data;

  // Check validity of train commands, and perform configuration commands.
  // Just silently consume a command if it is not OK.
  
  if (check_cmd_do_cfg (&data, my_size, my_dev)) {
    return my_size;
  }

  // If we get down to here, the command is a valid train command, and
  // should be inserted into the DMA list. But first we have to wait for
  // one of the DMA lists to be free.

  i = (int) wait_for_completion_interruptible_timeout(my_dev->cmpl,
                                                      TRAIN_WR_TIMEOUT);
  if (i <= 0) {
    return i;                          // Command interrupted or timed out.
  }

  // Then do the update.

  switch(data.data[0]) {
    case train_cmd_mfx:
    case train_cmd_mfx_once:  mfx_cmd(&data, my_size, my_dev); break;
    case train_cmd_dcc:       dcc_cmd(&data, my_size, my_dev); break;
    case train_cmd_mm:        mm_cmd(&data, my_dev);           break;
    case train_cmd_mmd:       mmd_cmd(&data, my_dev);          break;

    default:
    complete(my_dev->cmpl);   // Should never happen, but if it does,
    break;                    // this is the most sane to do.
  }

  return my_size;
}

//------------------------------------------------------------------
// Init and exit functions.
//------------------------------------------------------------------

//----- Init function. -----*

static int __init train_init(void)
{
  int i;
  int count;
  int tmp;

  // Check validity of module parameters.

  if (((sser1 | 1) != 1) || ((sser3 | 1) != 1) || ((sticky | 1) != 1)) {
    printk(KERN_ALERT "Error: train_mod: bad module parameters.\n");
    tmp = -EINVAL;
    goto init_err;
  }
  count = sser1 + sser3;
  if ((major <= 0) || (major >= 256) || (count == 0) ||
      (minor < 0) || (minor >= 256) || ((minor + count) >= 256)) {
    printk(KERN_ALERT "Error: train_mod: bad module parameters.\n");
    tmp = -EINVAL;
    goto init_err;
  }

  // Register port numbers.

  dev = MKDEV(SYNCSER_MAJOR, minor);
  tmp = register_chrdev_region(dev, count, SYNCSER_NAME);
  if (tmp) {
    printk(KERN_ALERT "Error: train_mod: failed to register chrdev region.\n");
    goto init_err;
  }
  train_init_level = train_init_region;

  // Grab the I/O port(s).


  if (sser3) {
    cris_free_io_interface(if_serial_3);
    tmp = cris_request_io_interface(if_sync_serial_3, "syncser3");
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed to grab I/O interface.\n");
      goto init_err;
    }
  }
  train_init_level = train_init_port1;
  if (sser1) {
    if (USB1_ALLOCATED) {

      // Deallocate usb1 if allocated. Only needed for sync_serial_1.

      cris_free_io_interface(if_usb_1);
    }
    tmp = cris_request_io_interface(if_sync_serial_1, "syncser1");
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed to grab I/O interface.\n");
      goto init_err;
    }
  }
  train_init_level = train_init_ports;

  // Allocate memory for the DMA descriptors and buffers.

  dma_mem_base = kmalloc(sizeof(struct train_ser_data) +
                         count * sizeof(struct train_dma_descr_pool),
                         GFP_DMA);
  if (!dma_mem_base) {
    printk(KERN_ALERT "Error: train_mod: out of memory.\n");
    tmp = -ENOMEM;
    goto init_err;
  }

  // Initialize DMA data buffers. These will only be accessed by DMA after
  // initialization.
  
  ser_data = dma_mem_base + count * sizeof(struct train_dma_descr_pool);
  init_ser_data(ser_data);
  train_init_level = train_init_started;

  // Initialize R_SYNC_SERIAL_PRESCALE.
  // There should really be a system wide shadow register for this, but
  // since there isn't, we have to overwrite the whole register.
  // Therefore, there is no way for this driver to coexist with
  // another driver for the other sync serial port. It would be difficult
  // anyway, since some settings are common to both ports.

  init_sser_prescale ();

  // Initialize each device.
  
  for (i = 0; i < count; i++) {
    train_devices[i].descr_list = dma_mem_base +
                                  i * sizeof(struct train_dma_descr_pool);
    init_descr_list(train_devices[i].descr_list);
    train_devices[i].nr = sser1 ? i : i + 1;
    train_devices[i].ref_cnt = 0;
    train_devices[i].cmpl = i ? &cmpl1 : &cmpl0;
    if (sticky) {
      tmp = init_sser_port(&train_devices[i]);
      if (tmp) goto init_err;
      train_devices[i].ref_cnt++;
      train_init_level = i ? train_init_dma1 : train_init_dma0;
    }
    init_MUTEX(&train_devices[i].sem);
    tmp = train_setup_cdev(&train_devices[i], i);
    if (tmp) {
      printk(KERN_ALERT "Error: train_mod: failed cdev init.\n");
      goto init_err;
    }
    train_init_level = train_init_cdev0;
  }
  train_init_level = train_init_success;
  printk(KERN_ALERT "Registered train_mod.\n");
  return 0;
  
  // Error returns.
  
  init_err:
  train_exit();
  return tmp;
}

//----- Exit function. -----*

static void __exit train_exit(void)
{
  int count;
  
  count = (sser1 + sser3);
  
  switch (train_init_level) {
    case train_init_success:
    if (count == 2) cdev_del(&train_devices[1].my_cdev);
    // Fall through.
    case train_init_dma1:
    if ((count == 2) && train_devices[1].ref_cnt) {
      stop_sser_port(&train_devices[1]);
    }
    // Fall through.
    case train_init_cdev0:
    cdev_del(&train_devices[0].my_cdev);
    // Fall through.
    case train_init_dma0:
    if (train_devices[0].ref_cnt) stop_sser_port(&train_devices[0]);
    // Fall through.
    case train_init_started:
    kfree(dma_mem_base);
    // Fall through.
    case train_init_ports:
    if (sser1) cris_free_io_interface(if_sync_serial_1);
    // Fall through.
    case train_init_port1:
    if (sser3) cris_free_io_interface(if_sync_serial_3);
    // Fall through.
    case train_init_region:
    unregister_chrdev_region(dev, count);
    // Fall through.
    case train_init_none:
    printk(KERN_ALERT "Goodbye from train_mod.\n");
    break;
    default:
    printk(KERN_ALERT "Error: train_mod: invalid init level.\n");
    break;
  }
}

module_init(train_init);
module_exit(train_exit);

//------------------------------------------------------------------
// Module information.
//------------------------------------------------------------------

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Per Zander http://home.swipnet.se/perz/contact.html");
MODULE_DESCRIPTION("Sync serial driver for model train control");
MODULE_VERSION("0.5");
